## 字典和集合基础

### 字典和集合的创建

In [1]:
d1 = {'name': 'json', 'age': 20, 'gender': 'male'}

In [2]:
d2 = dict({'name': 'json', 'age': 20, 'gender': 'male'})

In [3]:
d3 = dict([('name', 'json'), ('age', 20), ('gender', 'male')])

In [4]:
d4 = dict(name='json', age=20, gender='male')

In [5]:
d1 == d2 == d3 == d4

True

In [6]:
s1 = {1, 2, 3}

In [7]:
s2 = set([1, 2, 3])

In [8]:
s1 == s2

True

### Python中字典和集合，无论是键还是值，都可以是混合类型

In [9]:
s = {1, 'hello', 5.0}

## 元素访问，字典可以直接索引键，如果不存在，就会抛出异常

In [10]:
d = {'name': 'jason', 'age': 20}

In [11]:
d['name']

'jason'

In [12]:
d['location']

KeyError: 'location'

### 也可以使用get(key, default)函数来索引，如果键不存在，调用get()函数可以返回一个默认值

In [13]:
d = {'name': 'jason', 'age': 20}

In [14]:
d.get('name')

'jason'

In [16]:
d.get('location', 'null')

'null'

## 集合并不支持索引操作，因为集合本质上是一个哈希表，和列表不一样

In [17]:
s = {1, 2, 3}

In [18]:
dir(s)

['__and__',
 '__class__',
 '__contains__',
 '__delattr__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__iand__',
 '__init__',
 '__init_subclass__',
 '__ior__',
 '__isub__',
 '__iter__',
 '__ixor__',
 '__le__',
 '__len__',
 '__lt__',
 '__ne__',
 '__new__',
 '__or__',
 '__rand__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__ror__',
 '__rsub__',
 '__rxor__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__sub__',
 '__subclasshook__',
 '__xor__',
 'add',
 'clear',
 'copy',
 'difference',
 'difference_update',
 'discard',
 'intersection',
 'intersection_update',
 'isdisjoint',
 'issubset',
 'issuperset',
 'pop',
 'remove',
 'symmetric_difference',
 'symmetric_difference_update',
 'union',
 'update']

In [19]:
s[0]

TypeError: 'set' object is not subscriptable

## 判断一个元素在不在字典或集合内，我们可以用value in dict/set来判断

In [20]:
s = {1, 2, 3}

In [21]:
1 in s

True

In [22]:
10 in s

False

In [23]:
d = {'name': 'jason', 'age': 20}

In [24]:
'name' in d

True

In [25]:
'location' in d

False

## 字典和集合也同样支持增加/删除/更新等操作

In [26]:
d = {'name': 'jason', 'age': 20}

In [27]:
d['gender'] = 'male' # 增加元素对'gender': 'male'

In [28]:
d['dob'] = '1999-02-01' # 增加元素对'dob': '1999-02-01'

In [29]:
d

{'name': 'jason', 'age': 20, 'gender': 'male', 'dob': '1999-02-01'}

In [30]:
d['dob'] = '1998-01-01' # 更新键'dob'对应的值

In [31]:
d.pop('dob') # 删除键为'dob'的元素对

'1998-01-01'

In [32]:
d

{'name': 'jason', 'age': 20, 'gender': 'male'}

In [33]:
s = {1, 2, 3}

In [34]:
s.add(4) # 增加元素4到集合

In [35]:
s

{1, 2, 3, 4}

In [36]:
s.remove(4) # 从集合中删除元素4

In [37]:
s

{1, 2, 3}

## 对于字典，我们通常会根据键或值，进行升序或降序排序

In [38]:
d = {'b': 1, 'a': 2, 'c': 10}

In [39]:
d_sorted_by_key = sorted(d.items(), key=lambda x: x[0]) # 根据字典键的升序排序

In [46]:
d_sorted_by_key_reverse = sorted(d.items(), key=lambda x: x[0], reverse=True) # 根据字典键的降序排序

In [40]:
d_sorted_by_value = sorted(d.items(), key=lambda x: x[1]) # 根据字典值的升序排序

In [47]:
d_sorted_by_value_reverse = sorted(d.items(), key=lambda x: x[1], reverse=True) # 根据字典值的降序排序

In [41]:
d_sorted_by_key

[('a', 2), ('b', 1), ('c', 10)]

In [48]:
d_sorted_by_key_reverse

[('c', 10), ('b', 1), ('a', 2)]

In [42]:
d_sorted_by_value

[('b', 1), ('a', 2), ('c', 10)]

In [49]:
d_sorted_by_value_reverse

[('c', 10), ('a', 2), ('b', 1)]

## 对于集合，直接调用sorted(set)即可

In [43]:
s = {3, 4, 2, 1}

In [44]:
sorted(s) # 对集合的元素进行升序排序

[1, 2, 3, 4]

In [45]:
help(sorted)

Help on built-in function sorted in module builtins:

sorted(iterable, /, *, key=None, reverse=False)
    Return a new list containing all items from the iterable in ascending order.
    
    A custom key function can be supplied to customize the sort order, and the
    reverse flag can be set to request the result in descending order.



## 字典和集合性能

In [50]:
def find_product_price(products, product_id):
    for id, price in products:
        if id == product_id:
            return price
    return None

products = [
    (143121312, 100),
    (432314553, 30),
    (32421912367, 150)
]

print('The price of product 432314553 is {}'.format(find_product_price(products, 432314553)))

The price of product 432314553 is 30


In [52]:
products = {
    143121312: 100,
    432314553: 30,
    32421912367: 150
}

print('The price of product 432314553 is {}'.format(products[432314553]))

The price of product 432314553 is 30


In [58]:
# list version
# A&B两层循环，需要O(n^2)的时间复杂度
def find_unique_price_using_list(products):
    unique_price_list = []
    for _, price in products: # A
        if price not in unique_price_list: # B
            unique_price_list.append(price)
    return len(unique_price_list)

In [61]:
products = [
    (143121312, 100),
    (432314553, 30),
    (32421912367, 150),
    (937153201, 30)
]
print("number of unique price is: {}".format(find_unique_price_using_list(products)))

number of unique price is: 3


In [62]:
# set version
# 集合是高度优化的哈希表，总的时间复杂的O(n)
def find_unique_price_using_set(products):
    unique_price_set = set()
    for _, price in products:
        unique_price_set.add(price)
    return len(unique_price_set)

In [63]:
products = [
    (143121312, 100),
    (432314553, 30),
    (32421912367, 150),
    (937153201, 30)
]
print("number of unique price is: {}".format(find_unique_price_using_set(products)))

number of unique price is: 3


In [65]:
import time
id = [x for x in range(100000)]
price = [x for x in range(200000, 300000)]
products = list(zip(id, price))

In [66]:
# 计算列表版本的时间
start_using_list = time.perf_counter()
find_unique_price_using_list(products)
end_using_list = time.perf_counter()
print("time elapse using list: {}".format(end_using_list - start_using_list))

time elapse using list: 26.39814709700113


In [67]:
# 计算集合版本的时间
start_using_set = time.perf_counter()
find_unique_price_using_set(products)
end_using_set = time.perf_counter()
print("time elapse using set: {}".format(end_using_set - start_using_set))

time elapse using set: 0.01565658599974995


In [56]:
help(time.perf_counter)

Help on built-in function perf_counter in module time:

perf_counter(...)
    perf_counter() -> float
    
    Performance counter for benchmarking.



## 字典和集合的工作原理

## 插入操作

# 思考题

## Option A 更高效

In [68]:
# Option A
# d = {'name': 'jason', 'age': 20, 'gender': 'male'}
# Option B
# d = dict({'name': 'jason', 'age': 20, 'gender': 'male'})

In [69]:
import timeit

In [74]:
timeit.timeit("d = {'name': 'jason', 'age': 20, 'gender': 'male'}")

0.0744255460012937

In [75]:
timeit.timeit("d = dict({'name': 'jason', 'age': 20, 'gender': 'male'})")

0.23724565600059577

## 字典的键不能是一个列表

In [76]:
d = {'name': 'jason', ['education']: ['Tsinghua University', 'Stanford University']}

TypeError: unhashable type: 'list'

### 字典的键必须是hashable