# 元组
固定长度,不可改变的python序列对象

## 基本操作

In [1]:
tup=4,5,6
tup

(4, 5, 6)

In [3]:
nested_tup=(4,5,6),(7,8)
nested_tup

((4, 5, 6), (7, 8))

将任意序列转换为元组

In [6]:
a = [1, 2, 3, 4]
print(tuple(a))
b = 'string'
print(tuple(b))

(1, 2, 3, 4)
('s', 't', 'r', 'i', 'n', 'g')


元组定义后不可改变元组中的对象  
但是元组中的可变对象可以改变值,例如list类型  
事实上元组元素不可改变的本质是元素的内存地址空间不可改变  
而变量改变数值实际上是变量所指向的内存空间的改变  
下面的例子可以看出list之所以能够改变是因为list中的内容改变后其内存地址空间不变

In [146]:
tup = ('foo', [1, 2, 3], True)
print(id(tup[1]))
tup[1].append(4)
print(id(tup[1]))
print(tup)

1316112565056
1316112565056
('foo', [1, 2, 3, 4], True)


In [9]:
tup[2]=False

TypeError: 'tuple' object does not support item assignment

可以用加号将元组串联

In [13]:
 (1,None,'foo')+(6,0)+('bar',) # 注意元组只含单个元素时的特殊写法

(1, None, 'foo', 6, 0, 'bar')

## 拆分元组

将元组赋值给类似元组的变量,python会试图拆分等号右边的值

In [17]:
tup=(4,5,6)
a,b,c=tup
print(a)
print(b)
print(c)

4
5
6


即使含有元组的值也会被拆分

In [21]:
tup=4,5,(6,7)
a,b,(c,d)=tup
print(a)
print(b)
print(c)
print(d)

4
5
6
7


因此在python中,变量替换更为简单,写为

In [24]:
a,b=1,2
print(a)
print(b)
b,a=a,b
print(a)
print(b)

1
2
2
1


变量拆分经常用来迭代元组或列表序列

In [25]:
seq=[(1,2,3),(4,5,6),(7,8,9)]
for a,b,c in seq:
    print('a={0},b={1},c={2}'.format(a,b,c))

a=1,b=2,c=3
a=4,b=5,c=6
a=7,b=8,c=9


高级的元组拆分: 只摘取元组的头几个元素

In [34]:
value=1,2,3,4,5
a,b,*rest=value
print(a,b)

1 2


rest名字不重要,也可以用下划线代替

In [35]:
c,d,e,*_=value
print(c,d,e)

1 2 3


也可以只摘取末尾几个元素

In [30]:
*rest,a,b=value
a,b

(4, 5)

# 列表

经常的,list()函数可以用来将实体化迭代器或生成器中生成的对象转化为list类型

In [40]:
gen=range(10)
print(gen)
gen=list(gen)
print(gen)

range(0, 10)
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]


增删改查

In [41]:
b_list=['foo','peekaboo','baz']
b_list.append('dwarf')
b_list

['foo', 'peekaboo', 'baz', 'dwarf']

In [42]:
b_list.insert(1,'red')
b_list

['foo', 'red', 'peekaboo', 'baz', 'dwarf']

In [43]:
b_list.pop(2)
b_list

['foo', 'red', 'baz', 'dwarf']

remove函数只会删除第一个值

In [46]:
b_list.append('foo')
print(b_list)
b_list.remove('foo')
print(b_list)

['red', 'baz', 'dwarf', 'foo', 'foo', 'foo']
['red', 'baz', 'dwarf', 'foo', 'foo']


## 串联和组合列表

In [47]:
[4, None, 'foo'] + [7, 8, (2, 3)]

[4, None, 'foo', 7, 8, (2, 3)]

In [48]:
x=[4, None, 'foo']
x.extend([7,8,(2,3)])
x

[4, None, 'foo', 7, 8, (2, 3)]

加法串联列表计算量较大,大列表中使用extend更为可取

In [57]:
for i in range(10):
    lists=list(range(i))
    list_of_lists=[]
    list_of_lists.append(lists)
print(list_of_lists)

[[0, 1, 2, 3, 4, 5, 6, 7, 8]]


In [56]:
def time_consume_extend():
    for i in range(10):
        lists=list(range(i))
        list_of_lists=[]
        list_of_lists.append(lists)
    everything = []
    for chunk in list_of_lists:
        everything.extend(chunk)
    print(everything)
    
time_consume_extend()

[0, 1, 2, 3, 4, 5, 6, 7, 8]


# 排序

In [58]:
a=[9,3,4,1,6]
a.sort()
a

[1, 3, 4, 6, 9]

利用二级key选项进行排序操作

In [59]:
b=['saw', 'small', 'He', 'foxes', 'six']
b.sort(key=len)
b

['He', 'saw', 'six', 'small', 'foxes']

bisect支持二分查找  
bisect.bisect可以找到插入值后仍然保证排序的位置  
bisect.insort向适当位置插入值  

In [66]:
import bisect
c=[1,2,2,2,3,4,5,5,6,7,9]
bisect.bisect(c,2)
bisect.insort(c,6)
c

[1, 2, 2, 2, 3, 4, 5, 5, 6, 6, 7, 9]

## 切片

基本形式:[start:stop]

In [67]:
seq = [7, 2, 3, 7, 5, 6, 0, 1]
seq[1:5]

[2, 3, 7, 5]

切片能被赋值

In [68]:
seq[3:4] = [6, 3]
seq

[7, 2, 3, 6, 3, 5, 6, 0, 1]

In [69]:
print(seq[:5])
print(seq[3:])

[7, 2, 3, 6, 3]
[6, 3, 5, 6, 0, 1]


In [72]:
print(seq)
print(seq[-4:])
print(seq[-6:-2])

[7, 2, 3, 6, 3, 5, 6, 0, 1]
[5, 6, 0, 1]
[6, 3, 5, 6]


两个冒号可以隔一个取一个元素

In [75]:
print(seq)
seq[::2]

[7, 2, 3, 6, 3, 5, 6, 0, 1]


[7, 3, 3, 6, 1]

利用这个特性可以将元组颠倒输出

In [77]:
seq[::-1]

[7, 2, 3, 6, 3, 5, 6, 0, 1]


## enumerate函数

内建函数enumerate()会返回(i,value)元组序列

In [79]:
some_list = ['foo', 'bar', 'baz']
mapping = {}
for i, value in enumerate(some_list):
    mapping[value] = i
mapping

{'foo': 0, 'bar': 1, 'baz': 2}

## sorted函数

从任意序列的元素返回一个新的排好的列表  
而sort函数会直接改变数组本身的值

In [89]:
seq = [7, 1, 2, 6, 0, 3, 2]
print(seq)
print(sorted(seq))
print(seq)
seq.sort()
print(seq)

[7, 1, 2, 6, 0, 3, 2]
[0, 1, 2, 2, 3, 6, 7]
[7, 1, 2, 6, 0, 3, 2]
[0, 1, 2, 2, 3, 6, 7]


## zip函数

将任意多个列表,元组或其他序列打包为一个元组列表  
元素个数取决于最短的列表元素的个数

In [97]:
seq1 = ['foo', 'bar', 'baz']
seq2 = ['one', 'two', 'three', 'four']
zipped = zip(seq1, seq2)
list(zipped)

[('foo', 'one'), ('bar', 'two'), ('baz', 'three')]

和enumerate()函数结合可以实现同时迭代多个序列

In [101]:
for i, (a, b) in enumerate(zip(seq1, seq2)):
    print('{0}:{1},{2}'.format(i, a, b))

0:foo,one
1:bar,two
2:baz,three


利用zip来解压序列

In [107]:
pitchers = [('Nolan', 'Ryan'), ('Roger', 'Clemens'), ('Schilling', 'Curt')]
first_names, last_names = zip(*pitchers)
print(first_names)
print(last_names)

('Nolan', 'Roger', 'Schilling')
('Ryan', 'Clemens', 'Curt')


## reversed函数

从后向前迭代序列  
注意只有实体化之后才能创建反转的序列

In [108]:
list(reversed(range(10)))

[9, 8, 7, 6, 5, 4, 3, 2, 1, 0]

# 字典

为键值对的大小可变集合

In [111]:
empty_dict = {}
empty_dict

{}

In [112]:
d1={'a':'some value','b':'[1,2,3,4]'}
d1

{'a': 'some value', 'b': '[1,2,3,4]'}

## 增

In [113]:
d1[7]='an integer'
d1

{'a': 'some value', 'b': '[1,2,3,4]', 7: 'an integer'}

## 查

In [114]:
'b' in d1

True

## 删

del和pop函数

In [120]:
d1[5] = 'some value'
d1['dummy'] = 'another value'
print(d1)
del d1[5]
print(d1)
ret = d1.pop('dummy')
print(ret)
print(d1)

{'a': 'some value', 'b': '[1,2,3,4]', 7: 'an integer', 5: 'some value', 'dummy': 'another value'}
{'a': 'some value', 'b': '[1,2,3,4]', 7: 'an integer', 'dummy': 'another value'}
another value
{'a': 'some value', 'b': '[1,2,3,4]', 7: 'an integer'}


## 其他方法

keys()和values()

In [121]:
list(d1.keys())

['a', 'b', 7]

In [122]:
list(d1.values())

['some value', '[1,2,3,4]', 'an integer']

update()将字典与字典融合

In [124]:
print(d1)
d1.update({'b': 'foo', 'c': 12})
d1

{'a': 'some value', 'b': '[1,2,3,4]', 7: 'an integer'}


{'a': 'some value', 'b': 'foo', 7: 'an integer', 'c': 12}

## 用序列创建字典

In [125]:
key_list = d1.keys()
value_list = d1.values()
mapping = {}
for key, value in zip(key_list, value_list):
    mapping[key] = value
mapping

{'a': 'some value', 'b': 'foo', 7: 'an integer', 'c': 12}

字典实际上是一个二元组的集合  
因此它可以接受二元组的列表

In [126]:
mapping = dict(zip(range(5), reversed(range(5))))
mapping

{0: 4, 1: 3, 2: 2, 3: 1, 4: 0}

## 默认值

In [129]:
print(d1)
value = d1.get(2, 'default_value')  # 搜索不到键为2的值则返回默认值
value

{'a': 'some value', 'b': 'foo', 7: 'an integer', 'c': 12}


'default_value'

通过首字母将单词分类

In [133]:
words = ['apple', 'bat', 'bar', 'atom', 'book']
by_letter = {}
for word in words:
    letter = word[0]
    if letter not in by_letter:
        by_letter[letter] = [word]
        print(by_letter)
    else:
        by_letter[letter].append(word)
        print(by_letter)
by_letter

{'a': ['apple']}
{'a': ['apple'], 'b': ['bat']}
{'a': ['apple'], 'b': ['bat', 'bar']}
{'a': ['apple', 'atom'], 'b': ['bat', 'bar']}
{'a': ['apple', 'atom'], 'b': ['bat', 'bar', 'book']}


{'a': ['apple', 'atom'], 'b': ['bat', 'bar', 'book']}

In [140]:
words = ['apple', 'bat', 'bar', 'atom', 'book']
by_letter={}
for word in words:
    letter=word[0]
    by_letter.setdefault(letter,[]).append(word)
by_letter

{'a': ['apple', 'atom'], 'b': ['bat', 'bar', 'book']}

In [144]:
from collections import defaultdict
by_letter = defaultdict(list)
for word in words:
    by_letter[word[0]].append(word)
by_letter

defaultdict(list, {'a': ['apple', 'atom'], 'b': ['bat', 'bar', 'book']})

## 有效的键类型

键值通常为不可变类型  
而不可变类型对象具有可哈希性  
可以通过hash()函数来检测对象是否能够被用来当字典的键

In [148]:
hash('string')

-1417954228885140279

列表可变,不可hash

In [149]:
hash([1,2,3])

TypeError: unhashable type: 'list'

若要将列表当作键值,就必须转化为元组

In [152]:
d={}
d[tuple([1,2,3])]=5
d

{(1, 2, 3): 5}

# 集合

无序的**不可重复**的元素的集合  
可以看作是只有键没有值的字典  

In [153]:
set({1,3,5,2,2})

{1, 2, 3, 5}

In [154]:
{2,2,2,5,6,7,7}

{2, 5, 6, 7}

## 集合的并,交

In [155]:
a={1,2,3,4,5}
b={3,4,5,6,7,8}

In [156]:
a.union(b)

{1, 2, 3, 4, 5, 6, 7, 8}

In [157]:
a|b

{1, 2, 3, 4, 5, 6, 7, 8}

In [158]:
a.intersection(b)

{3, 4, 5}

In [159]:
a&b

{3, 4, 5}

## 检测子集还是父集

In [160]:
a_set={1,2,3,4,5}

In [161]:
{1,2,3}.issubset(a_set)

True

In [162]:
a_set.issuperset(a_set)

True

In [163]:
{1,2,3,4,5,6}.issuperset(a_set)

True

In [165]:
{1,2,3}=={3,1,2}

True

# 列表,集合和字典推导式

[expr for val in colletion if condition]

In [170]:
strings=['a','as','bat','car','dove','python']

In [173]:
[x.upper() for x in strings if len(x) > 2]

['BAT', 'CAR', 'DOVE', 'PYTHON']

dict_comp={key_expr: value for value in collection if condition}

set_comp={expr for value in collection if conditon}

In [176]:
unique_lengths={len(x) for x in strings}
unique_lengths

{1, 2, 3, 4, 6}

In [179]:
set(map(len,strings))

{1, 2, 3, 4, 6}

In [184]:
loc_mapping={val:index for index, val in enumerate(strings)}
loc_mapping

{'a': 0, 'as': 1, 'bat': 2, 'car': 3, 'dove': 4, 'python': 5}

# 嵌套列表推导式

In [2]:
all_data = [['John', 'Emily', 'Michael', 'Mary', 'Steven'],['Maria', 'Juan', 'Javier', 'Natalia', 'Pilar']]
all_data

[['John', 'Emily', 'Michael', 'Mary', 'Steven'],
 ['Maria', 'Juan', 'Javier', 'Natalia', 'Pilar']]

In [3]:
names_of_interest=[]
for names in all_data:
    enough_es = [name for name in names if name.count('e')>= 2]
    names_of_interest.extend(enough_es)

In [4]:
names_of_interest

['Steven']

双重嵌套列表推导式一行写法,过滤条件依然放在最后

In [5]:
result=[name for names in all_data for name in names if name.count('e')>=2]

In [6]:
result

['Steven']

注意循环顺序

In [197]:
some_tuples = [(3,2,1), (6,5,4), (7, 8, 9)]
flattend=[x for tup in some_tuples for x in tup]

In [198]:
flattend

[3, 2, 1, 6, 5, 4, 7, 8, 9]

大于三层嵌套时应当做适当分割提高代码可读性

In [200]:
[[x for x in tup] for tup in some_tuples]

[4, None, 'foo', 7, 8, (2, 3)]