### Familiar with builtin functions

#### Find unique elements inside a list

In [1]:
# Generate a long list
import random
random.seed(666) # set a random seed so you can get the same list as mine
a_long_list = [random.randint(0,50) for i in range(1000000)]

In [2]:
%%time 

# Clumsy 
unique = []
for n in a_long_list:
    if n not in unique:
        unique.append(n)

CPU times: user 307 ms, sys: 1.41 ms, total: 308 ms
Wall time: 309 ms


In [3]:
%%time

# Good
unique = list(set(a_long_list))

CPU times: user 8.74 ms, sys: 81 µs, total: 8.82 ms
Wall time: 8.81 ms


#### Sum up all values in a list

In [4]:
%%time 
# Clumsy 
sum_value = 0
for n in a_long_list:
    sum_value += n
print(sum_value)

25023368
CPU times: user 94.2 ms, sys: 1.13 ms, total: 95.3 ms
Wall time: 94.8 ms


In [5]:
%%time
# Good
sum_value = sum(a_long_list)
print(sum_value)

25023368
CPU times: user 4.55 ms, sys: 27 µs, total: 4.58 ms
Wall time: 4.57 ms


### `sort()` vs `sorted()`

#### Basic sorting

In [16]:
%%time
sorted(a_long_list)

CPU times: user 12 ms, sys: 2.51 ms, total: 14.5 ms
Wall time: 14.2 ms


[0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,


In [17]:
%%time
a_long_list.sort()

CPU times: user 8.52 ms, sys: 82 µs, total: 8.6 ms
Wall time: 10 ms


#### Sorting with `key`

In [12]:
%%time
str_list1 = "Although both functions can sort list, there are small differences".split()
sorted(str_list1, key=str.lower)

CPU times: user 29 µs, sys: 0 ns, total: 29 µs
Wall time: 32.9 µs


['Although',
 'are',
 'both',
 'can',
 'differences',
 'functions',
 'list,',
 'small',
 'sort',
 'there']

In [13]:
%%time
str_list2 = "Although both functions can sort list, there are small differences".split()
str_list2.sort(key=str.lower)

CPU times: user 26 µs, sys: 0 ns, total: 26 µs
Wall time: 29.8 µs


In [14]:
%%time
str_list1 = "Although both functions can sort list, there are small differences".split()
sorted(str_list1, key=lambda str: len(str))

CPU times: user 61 µs, sys: 3 µs, total: 64 µs
Wall time: 59.8 µs


['can',
 'are',
 'both',
 'sort',
 'list,',
 'there',
 'small',
 'Although',
 'functions',
 'differences']

In [15]:
%%time
str_list2 = "Although both functions can sort list, there are small differences".split()
str_list2.sort(key=lambda str: len(str))

CPU times: user 36 µs, sys: 0 ns, total: 36 µs
Wall time: 38.9 µs


#### `sorted()` for other iterables

In [2]:
a_dict = {'A': 1, 'B': 3, 'C': 2, 'D': 4, 'E': 5}

In [50]:
%%time
sorted(a_dict) # sort the dictionary based on its keys by default

CPU times: user 4 µs, sys: 0 ns, total: 4 µs
Wall time: 6.91 µs


['A', 'B', 'C', 'D', 'E']

In [51]:
%%time
sorted(a_dict.items(), key=lambda item: item[1]) # sort the dictionary based on its value and return a list of tuples

CPU times: user 7 µs, sys: 0 ns, total: 7 µs
Wall time: 8.82 µs


[('A', 1), ('C', 2), ('B', 3), ('D', 4), ('E', 5)]

In [52]:
%%time
# sort the dictionary based on its value and re-pack as dictionary
{key: value for key, value in sorted(a_dict.items(), key=lambda item: item[1])}

CPU times: user 8 µs, sys: 0 ns, total: 8 µs
Wall time: 11.2 µs


{'A': 1, 'C': 2, 'B': 3, 'D': 4, 'E': 5}

### Use symbols instead of their names

In [7]:
%%time
# sort the dictionary based on its value and re-pack as dictionary
sorted_dict1 = dict()
for key, value in sorted(a_dict.items(), key=lambda item: item[1]):
    sorted_dict1[key] = value

CPU times: user 10 µs, sys: 0 ns, total: 10 µs
Wall time: 12.2 µs


In [11]:
%%time
# sort the dictionary based on its value and re-pack as dictionary
sorted_dict2 = {}
for key, value in sorted(a_dict.items(), key=lambda item: item[1]):
    sorted_dict2[key] = value

CPU times: user 9 µs, sys: 0 ns, total: 9 µs
Wall time: 11 µs


In [12]:
%%time
list()

CPU times: user 3 µs, sys: 0 ns, total: 3 µs
Wall time: 3.81 µs


[]

In [19]:
%%time
[]

CPU times: user 2 µs, sys: 1e+03 ns, total: 3 µs
Wall time: 3.1 µs


[]

### List comprehension

In [88]:
# Generate a long list
import random
random.seed(666) # set a random seed so you can get the same list as mine
another_long_list = [random.randint(0,500) for i in range(1000000)]

In [22]:
%%time
even_num = []
for number in another_long_list:
    if number % 2 == 0:
        even_num.append(number)

CPU times: user 113 ms, sys: 3.55 ms, total: 117 ms
Wall time: 117 ms


In [24]:
%%time
even_num = [number for number in another_long_list if number % 2 == 0]

CPU times: user 56.6 ms, sys: 3.73 ms, total: 60.3 ms
Wall time: 64.8 ms


In [9]:
%%time
# sort the dictionary based on its value and re-pack as dictionary
sorted_dict3 = {key: value for key, value in sorted(a_dict.items(), key=lambda item: item[1])}

CPU times: user 8 µs, sys: 0 ns, total: 8 µs
Wall time: 10 µs


### Use `enumerate()` for value and index

In [40]:
# Generate a long list
import random
random.seed(666) # set a random seed so you can get the same list as mine
a_short_list = [random.randint(0,500) for i in range(5)]

In [43]:
%%time
for i in range(len(a_short_list)):
    print(f'number {i} is {a_short_list[i]}')

number 0 is 233
number 1 is 462
number 2 is 193
number 3 is 222
number 4 is 145
CPU times: user 189 µs, sys: 123 µs, total: 312 µs
Wall time: 214 µs


In [44]:
%%time
for i, number in enumerate(a_short_list):
    print(f'number {i} is {number}')

number 0 is 233
number 1 is 462
number 2 is 193
number 3 is 222
number 4 is 145
CPU times: user 72 µs, sys: 15 µs, total: 87 µs
Wall time: 90.1 µs


In [73]:
%%time 
# just need its value
for _, number in enumerate(a_short_list):
    print(f'{number}')

233
462
193
222
145
CPU times: user 642 µs, sys: 479 µs, total: 1.12 ms
Wall time: 816 µs


In [74]:
%%time 
# just need its value
for number in a_short_list:
    print(f'{number}')

233
462
193
222
145
CPU times: user 106 µs, sys: 66 µs, total: 172 µs
Wall time: 137 µs


### Use `zip()` for packing and unpacking multiple iterables

In [78]:
list1 = ['a', 'b', 'c', 'd', 'e']
list2 = ['1', '2', '3', '4', '5']

In [79]:
# Packing two list as a list of tuple
pairs_list = [pair for pair in zip(list1, list2)]

In [87]:
pairs_list

[('a', '1'), ('b', '2'), ('c', '3'), ('d', '4'), ('e', '5')]

In [86]:
%%time
# unpacking a list of tuple
letters1, numbers1 = zip(*pairs_list)

CPU times: user 5 µs, sys: 1e+03 ns, total: 6 µs
Wall time: 6.91 µs


In [84]:
%%time
# unpacking a list of tuple
letters2 = [pair[0] for pair in pairs_list]
numbers2 = [pair[1] for pair in pairs_list]

CPU times: user 5 µs, sys: 1e+03 ns, total: 6 µs
Wall time: 7.87 µs


### Combine `set()` and `in`

In [90]:
# Construct a function for membership test
def check_membership(n):
    for element in another_long_list:
        if element == n:
            return True
    return False

In [96]:
%%time
# Check if 900 in the list
check_membership(900)

CPU times: user 29.7 ms, sys: 847 µs, total: 30.5 ms
Wall time: 30.2 ms


False

In [97]:
%%time
# Check if 900 in the list
900 in another_long_list

CPU times: user 10.2 ms, sys: 79 µs, total: 10.3 ms
Wall time: 10.3 ms


False

In [101]:
%%time
# De-duplicate the list first
check_list = set(another_long_list)

CPU times: user 19.8 ms, sys: 204 µs, total: 20 ms
Wall time: 20 ms


In [100]:
%%time 
# Then check the small list instead using in
900 in check_list

CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.25 µs


False

### Check if a variable is true

In [107]:
string_returned_from_function = 'Hello World'

In [108]:
%%time
if string_returned_from_function == True:
    pass

CPU times: user 3 µs, sys: 0 ns, total: 3 µs
Wall time: 5.01 µs


In [109]:
%%time
if string_returned_from_function is True:
    pass

CPU times: user 2 µs, sys: 1 µs, total: 3 µs
Wall time: 4.05 µs


In [110]:
%%time
if string_returned_from_function:
    pass

CPU times: user 3 µs, sys: 0 ns, total: 3 µs
Wall time: 4.05 µs


### Count unique values using `Counter()`

In [111]:
from collections import Counter

In [116]:
%%time
num_counts1 = {}
for num in a_long_list:
    if num in num_counts:
        num_counts[num] += 1
    else:
        num_counts[num] = 1 

CPU times: user 448 ms, sys: 1.77 ms, total: 450 ms
Wall time: 450 ms


In [117]:
%%time
num_counts2 = Counter(a_long_list)

CPU times: user 40.7 ms, sys: 329 µs, total: 41 ms
Wall time: 41.2 ms


In [115]:
# print the 10 most common numbers and their counts
for number, count in num_counts.most_common(10):
    print(number, count)

29 19831
47 19811
7 19800
36 19794
14 19761
39 19748
32 19747
16 19737
34 19729
33 19729


### Put for loop inside the function

In [135]:
# define a function for square calculation
def compute_cubic1(number):
    '''Calculate the square of a number'''
    return number**3

In [136]:
%%time
new_list_cubic1 = [compute_cubic1(number) for number in a_long_list]

CPU times: user 335 ms, sys: 14.3 ms, total: 349 ms
Wall time: 354 ms


In [137]:
# define a function for square calculation for the whole list
def compute_cubic2():
    '''Calculate the square of each number in a list'''
    return [number**3 for number in a_long_list]

In [138]:
%%time
new_list_cubic2 = compute_cubic2()

CPU times: user 261 ms, sys: 15.7 ms, total: 277 ms
Wall time: 277 ms
