第十章序列的修改、散列和切片


10.1 Vector类：用户定义的序列类型

10.2 Vector类第1版：与Vector2d兼容

In [1]:
from array import array
array('d',[1,2,3])

array('d', [1.0, 2.0, 3.0])

In [9]:
import reprlib
reprlib.repr(array('d',range(10)))

"array('d', [0.0, 1.0, 2.0, 3.0, 4.0, ...])"

In [2]:
from array import array
import reprlib
import math

#序列类型的构造方法最后接受可迭代的对象为参数，因为所有内置的序列类型都是这样
class Vector:
    typecode='d'
    
    def __init__(self,components):
        self._component = array(self.typecode,components)
    
    def __iter__(self):
        return iter(self._component)
    
    def __repr__(self):
        components = reprlib.repr(self._component)#reprlib模块可以生成长度有限的表示形式
        components = components[components.find('['):-1]#把字符串插入Vector的构造方法调用之前，去掉前面arra('d'和后面的)
        return 'Vector({})'.format(components)
    
    def __str__(self):
        return str(tuple(self))
    
    def __bytes__(self):
        return (bytes[ord(self.typecode)])+bytes(self._component)
    
    def __eq__(self, other):
        return tuple(self)==tuple(other)
    
    def __abs__(self):
        return math.sqrt(sum(x*x for x in self))
    
    def __bool__(self):
        return bool(abs(self))
    
    @classmethod
    def frombytes(cls,octets):
        typecode=chr(octets[0])
        memv = memoryview(octets[1:]).cast(typecode)
        return cls(memv)#不用像上一章一样使用*拆包，因为Vector是可迭代的

In [3]:
a=Vector([3.1,4.2])

In [4]:
a

Vector([3.1, 4.2])

In [5]:
[x for x in a]

[3.1, 4.2]

In [6]:
abs(a)

5.220153254455275

In [7]:
Vector((3,4,5))

Vector([3.0, 4.0, 5.0])

In [8]:
Vector(range(10))

Vector([0.0, 1.0, 2.0, 3.0, 4.0, ...])

10.3协议和鸭子类型

![chapter10-3](chapter10-3.png)

In [10]:
import collections

Card=collections.namedtuple('Card',['rank','suit'])

class FrenchDeck:
    ranks=[str(n) for n in range(2,11)]+list('JQKA')
    suits='spades diamonds clubs hearts'.split()
    
    def __init__(self):
        self._card=[Card(rank,suit) for suit in self.suits
                                    for rank in self.ranks]
    def __len__(self):
        return len(self._card)
    
    def __getitem__(self, position):
        return self._card[position]

10.4 Vector类第二版：可切片的序列

In [11]:
#只需添加__len__和__getitem__
from array import array
import reprlib
import math

#序列类型的构造方法最后接受可迭代的对象为参数，因为所有内置的序列类型都是这样
class Vector:
    typecode='d'
    
    def __init__(self,components):
        self._component = array(self.typecode,components)
    
    def __iter__(self):
        return iter(self._component)
    
    def __repr__(self):
        components = reprlib.repr(self._component)#reprlib模块可以生成长度有限的表示形式
        components = components[components.find('['):-1]#把字符串插入Vector的构造方法调用之前，去掉前面arra('d'和后面的)
        return 'Vector({})'.format(components)
    
    def __str__(self):
        return str(tuple(self))
    
    def __bytes__(self):
        return (bytes[ord(self.typecode)])+bytes(self._component)
    
    def __eq__(self, other):
        return tuple(self)==tuple(other)
    
    def __abs__(self):
        return math.sqrt(sum(x*x for x in self))
    
    def __bool__(self):
        return bool(abs(self))
    
    @classmethod
    def frombytes(cls,octets):
        typecode=chr(octets[0])
        memv = memoryview(octets[1:]).cast(typecode)
        return cls(memv)#不用像上一章一样使用*拆包，因为Vector是可迭代的
    
    def __len__(self):
        return len(self._component)
    
    def __getitem__(self, position):
        return self._component[position]

In [12]:
v1=Vector([3,4,5])

In [13]:
len(v1)

3

In [14]:
v1[-1]

5.0

In [15]:
v7=Vector(range(7))

In [16]:
v7[1:4]

array('d', [1.0, 2.0, 3.0])

可以看到，现在连切片都支持了，不过尚不完美。如果Vector实例的切片也是Vector实例，而不是数组就更好了。

10.4.1切片原理

In [18]:
class MySeq:
    def __getitem__(self, item):
        return item#在这个示例中，__getitem__直接返回传给他的值
s=MySeq()
s[1]


1

In [20]:
s[1:4:2]

slice(1, 4, 2)

In [21]:
s[1:4:2,9] #神奇的事发生了，如果[]中有逗号，那么__getitem__收到的是元组

(slice(1, 4, 2), 9)

In [22]:
s[1:3:2,7:9]#元组中甚至可以有多个切片对象

(slice(1, 3, 2), slice(7, 9, None))

In [23]:
#现在来看看slice本身
#查看slice类的属性
slice

slice

In [24]:
dir(slice)

['__class__',
 '__delattr__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__le__',
 '__lt__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 'indices',
 'start',
 'step',
 'stop']

In [25]:
help(slice.indices)

Help on method_descriptor:

indices(...)
    S.indices(len) -> (start, stop, stride)
    
    Assuming a sequence of length len, calculate the start and stop
    indices, and the stride length of the extended slice described by
    S. Out of bounds indices are clipped in a manner consistent with the
    handling of normal slices.



In [27]:
slice(None,10,2).indices(5) #'ABCDE'[:10:2]等同于'ABCDE'[0,5,2]

(0, 5, 2)

In [30]:
slice(-3,None,None).indices(5)#'ABCDE'[-3:]等同于’ABCDE'[2:5:1]

(2, 5, 1)

10.4.2能处理切片的__getitem__方法

In [31]:
#重写__getitem__方法
#只需添加__len__和__getitem__
from array import array
import reprlib
import math
import numbers

#序列类型的构造方法最后接受可迭代的对象为参数，因为所有内置的序列类型都是这样
class Vector:
    typecode='d'
    
    def __init__(self,components):
        self._component = array(self.typecode,components)
    
    def __iter__(self):
        return iter(self._component)
    
    def __repr__(self):
        components = reprlib.repr(self._component)#reprlib模块可以生成长度有限的表示形式
        components = components[components.find('['):-1]#把字符串插入Vector的构造方法调用之前，去掉前面arra('d'和后面的)
        return 'Vector({})'.format(components)
    
    def __str__(self):
        return str(tuple(self))
    
    def __bytes__(self):
        return (bytes[ord(self.typecode)])+bytes(self._component)
    
    def __eq__(self, other):
        return tuple(self)==tuple(other)
    
    def __abs__(self):
        return math.sqrt(sum(x*x for x in self))
    
    def __bool__(self):
        return bool(abs(self))
    
    @classmethod
    def frombytes(cls,octets):
        typecode=chr(octets[0])
        memv = memoryview(octets[1:]).cast(typecode)
        return cls(memv)#不用像上一章一样使用*拆包，因为Vector是可迭代的
    
    def __len__(self):
        return len(self._component)
    
    def __getitem__(self, index):
        cls=type(self)
        if isinstance(index,slice):
            return cls(self._component[index])
        elif isinstance(index,numbers.Integral):
            return self._component[index]
        else:
            msg='{cls.__name__} indices must be itegers'
            raise TypeError(msg.format(cls=cls))

In [32]:
v7=Vector(range(7))

In [33]:
v7[-1]

6.0

In [34]:
v7[1:4]

Vector([1.0, 2.0, 3.0])

In [35]:
v7[-1:]

Vector([6.0])

In [37]:
v7[1,2] #不支持多维索引

TypeError: Vector indices must be itegers

10.5 Vector类第三版：动态存取属性

![chapter10-5](image/chapter10-5.png)

In [38]:
#添加__getattr__
from array import array
import reprlib
import math
import numbers

#序列类型的构造方法最后接受可迭代的对象为参数，因为所有内置的序列类型都是这样
class Vector:
    typecode='d'
    shotcut_names = 'xyzt'
    
    def __getattr__(self, name):
        cls=type(self)
        if len(name)==1:
            pos=cls.shotcut_names.find(name)
            if 0<=pos<len(self._component):
                return self._component[pos]
        msg='{.__name__!r} object has no arrtibute {!r}'
        raise AttributeError(msg.format(cls,name))
        
    
    def __init__(self,components):
        self._component = array(self.typecode,components)
    
    def __iter__(self):
        return iter(self._component)
    
    def __repr__(self):
        components = reprlib.repr(self._component)#reprlib模块可以生成长度有限的表示形式
        components = components[components.find('['):-1]#把字符串插入Vector的构造方法调用之前，去掉前面arra('d'和后面的)
        return 'Vector({})'.format(components)
    
    def __str__(self):
        return str(tuple(self))
    
    def __bytes__(self):
        return (bytes[ord(self.typecode)])+bytes(self._component)
    
    def __eq__(self, other):
        return tuple(self)==tuple(other)
    
    def __abs__(self):
        return math.sqrt(sum(x*x for x in self))
    
    def __bool__(self):
        return bool(abs(self))
    
    @classmethod
    def frombytes(cls,octets):
        typecode=chr(octets[0])
        memv = memoryview(octets[1:]).cast(typecode)
        return cls(memv)#不用像上一章一样使用*拆包，因为Vector是可迭代的
    
    def __len__(self):
        return len(self._component)
    
    def __getitem__(self, index):
        cls=type(self)
        if isinstance(index,slice):
            return cls(self._component[index])
        elif isinstance(index,numbers.Integral):
            return self._component[index]
        else:
            msg='{cls.__name__} indices must be itegers'
            raise TypeError(msg.format(cls=cls))

In [39]:
v=Vector(range(10))

In [40]:
v.x

0.0

In [41]:
v.y,v.z,v.t

(1.0, 2.0, 3.0)

In [42]:
#不恰当的行为：为v.x赋值没有抛出错误，但前后矛盾
v=Vector(range(5))
v

Vector([0.0, 1.0, 2.0, 3.0, 4.0])

In [43]:
v.x

0.0

In [44]:
v.x=10
v.x

10

In [45]:
v

Vector([0.0, 1.0, 2.0, 3.0, 4.0])

In [58]:
 #添加__setattr__
from array import array
import reprlib
import math
import numbers

#序列类型的构造方法最后接受可迭代的对象为参数，因为所有内置的序列类型都是这样
class Vector:
    typecode='d'
    shotcut_names = 'xyzt'
    
    def __getattr__(self, name):
        cls=type(self)
        if len(name)==1:
            pos=cls.shotcut_names.find(name)
            if 0<=pos<len(self._component):
                return self._component[pos]
        msg='{.__name__!r} object has no arrtibute {!r}'
        raise AttributeError(msg.format(cls,name))
        
    
    def __init__(self,components):
        self._component = array(self.typecode,components)
    
    def __iter__(self):
        return iter(self._component)
    
    def __repr__(self):
        components = reprlib.repr(self._component)#reprlib模块可以生成长度有限的表示形式
        components = components[components.find('['):-1]#把字符串插入Vector的构造方法调用之前，去掉前面arra('d'和后面的)
        return 'Vector({})'.format(components)
    
    def __str__(self):
        return str(tuple(self))
    
    def __bytes__(self):
        return (bytes[ord(self.typecode)])+bytes(self._component)
    
    def __eq__(self, other):
        return tuple(self)==tuple(other)
    
    def __abs__(self):
        return math.sqrt(sum(x*x for x in self))
    
    def __bool__(self):
        return bool(abs(self))
    
    @classmethod
    def frombytes(cls,octets):
        typecode=chr(octets[0])
        memv = memoryview(octets[1:]).cast(typecode)
        return cls(memv)#不用像上一章一样使用*拆包，因为Vector是可迭代的
    
    def __len__(self):
        return len(self._component)
    
    def __getitem__(self, index):
        cls=type(self)
        if isinstance(index,slice):
            return cls(self._component[index])
        elif isinstance(index,numbers.Integral):
            return self._component[index]
        else:
            msg='{cls.__name__} indices must be itegers'
            raise TypeError(msg.format(cls=cls))  
      
     #注意，我们没有禁止为所有全部属性赋值，只是禁止为单个小写字母属性赋值，以防与只读属性x,y,z,t混淆   
    def __setattr__(self, name, value):
        cls = type(self)
        if len(name) == 1:  # <1>
            if name in cls.shotcut_names:  # <2>
                error = 'readonly attribute {attr_name!r}'
            elif name.islower():  # <3>
                error = "can't set attributes 'a' to 'z' in {cls_name!r}"
            else:
                error = ''  # <4>
            if error:  # <5>
                msg = error.format(cls_name=cls.__name__, attr_name=name)
                raise AttributeError(msg)
        super().__setattr__(name, value)  # <6>

In [59]:
v1=Vector(range(5))

In [60]:
v1

Vector([0.0, 1.0, 2.0, 3.0, 4.0])

In [61]:
v1.x

0.0

In [62]:
v1.x=10#__setattr__方法

AttributeError: readonly attribute 'x'

In [63]:
v1[0]=10 #__setitem__方法

TypeError: 'Vector' object does not support item assignment

In [64]:
v1[0]

0.0

In [65]:
v1.K=10

In [67]:
v1.k=10

AttributeError: can't set attributes 'a' to 'z' in 'Vector'

10.6 Vector类第四版：散列和快速等值测试

In [68]:
from functools import reduce
from operator import xor

#计算整数0~5的累计异或的三种方式
n=0
for i in range(1,6):
    n^=i
n

1

In [69]:
reduce(lambda a,b:a^b,range(6))

1

In [71]:
reduce(xor,range(6))

1

In [72]:
 #添加__hash__方法，与__eq__方法一起使用
from array import array
import reprlib
import math
import numbers
import functools
import operator

#序列类型的构造方法最后接受可迭代的对象为参数，因为所有内置的序列类型都是这样
class Vector:
    typecode='d'
    shotcut_names = 'xyzt'
    
    def __getattr__(self, name):
        cls=type(self)
        if len(name)==1:
            pos=cls.shotcut_names.find(name)
            if 0<=pos<len(self._component):
                return self._component[pos]
        msg='{.__name__!r} object has no arrtibute {!r}'
        raise AttributeError(msg.format(cls,name))
        
    
    def __init__(self,components):
        self._component = array(self.typecode,components)
    
    def __iter__(self):
        return iter(self._component)
    
    def __repr__(self):
        components = reprlib.repr(self._component)#reprlib模块可以生成长度有限的表示形式
        components = components[components.find('['):-1]#把字符串插入Vector的构造方法调用之前，去掉前面arra('d'和后面的)
        return 'Vector({})'.format(components)
    
    def __str__(self):
        return str(tuple(self))
    
    def __bytes__(self):
        return (bytes[ord(self.typecode)])+bytes(self._component)
    
    def __eq__(self, other):
        return tuple(self)==tuple(other)
    
    def __abs__(self):
        return math.sqrt(sum(x*x for x in self))
    
    
    def __bool__(self):
        return bool(abs(self))
    
    @classmethod
    def frombytes(cls,octets):
        typecode=chr(octets[0])
        memv = memoryview(octets[1:]).cast(typecode)
        return cls(memv)#不用像上一章一样使用*拆包，因为Vector是可迭代的
    
    def __len__(self):
        return len(self._component)
    
    def __getitem__(self, index):
        cls=type(self)
        if isinstance(index,slice):
            return cls(self._component[index])
        elif isinstance(index,numbers.Integral):
            return self._component[index]
        else:
            msg='{cls.__name__} indices must be itegers'
            raise TypeError(msg.format(cls=cls))  
      
     #注意，我们没有禁止为所有全部属性赋值，只是禁止为单个小写字母属性赋值，以防与只读属性x,y,z,t混淆   
    def __setattr__(self, name, value):
        cls = type(self)
        if len(name) == 1:  # <1>
            if name in cls.shotcut_names:  # <2>
                error = 'readonly attribute {attr_name!r}'
            elif name.islower():  # <3>
                error = "can't set attributes 'a' to 'z' in {cls_name!r}"
            else:
                error = ''  # <4>
            if error:  # <5>
                msg = error.format(cls_name=cls.__name__, attr_name=name)
                raise AttributeError(msg)
        super().__setattr__(name, value)  # <6>
        
    def __hash__(self):
        hashes=(hash(x) for x in self._component)
        return functools.reduce(operator.xor,hashes,0)#0是初始值
    
    

In [73]:
#映射过程计算各个分量的散列值，归约过程则使用xor运算符聚合所有散列值。把生成器表达式替换成map方法，映射过程更加明显
def __hash__(self):
    hashes=map(hash,self._components)
    return reduce(xor,hashes)
'''
在python2中，使用map效率会低些，因为map函数要使用结果构建一个列表。
但在python3中，map函数是惰性的，它会创建一个生成器，按需产出结果，因此能节省内存
'''

In [82]:
#原来的__eq__方法，会完整复制两个操作数，构建两个元组，只为了使用tuple类型的__eq__方法。
# 对Vector2d(只有两个分量)来说，这是个捷径，但是对维数很多的向量就不同了
#为了提高效率，Vector.__eq__方法在for循环中使用zip函数

from array import array
import reprlib
import math
import numbers
import functools
import operator

#序列类型的构造方法最后接受可迭代的对象为参数，因为所有内置的序列类型都是这样
class Vector:
    typecode='d'
    shotcut_names = 'xyzt'
    
    def __getattr__(self, name):
        cls=type(self)
        if len(name)==1:
            pos=cls.shotcut_names.find(name)
            if 0<=pos<len(self._component):
                return self._component[pos]
        msg='{.__name__!r} object has no arrtibute {!r}'
        raise AttributeError(msg.format(cls,name))
        
    
    def __init__(self,components):
        self._component = array(self.typecode,components)
    
    def __iter__(self):
        return iter(self._component)
    
    def __repr__(self):
        components = reprlib.repr(self._component)#reprlib模块可以生成长度有限的表示形式
        components = components[components.find('['):-1]#把字符串插入Vector的构造方法调用之前，去掉前面arra('d'和后面的)
        return 'Vector({})'.format(components)
    
    def __str__(self):
        return str(tuple(self))
    
    def __bytes__(self):
        return (bytes[ord(self.typecode)])+bytes(self._component)
    
    def __eq__(self, other):
        if len(self) != len(other):
            return False
        for a,b in zip(self,other):#zip函数生成一个由元祖构成的生成器
            if a!=b:
                return False
        return True
    
    def __abs__(self):
        return math.sqrt(sum(x*x for x in self))
    
    def __bool__(self):
        return bool(abs(self))
    
    @classmethod
    def frombytes(cls,octets):
        typecode=chr(octets[0])
        memv = memoryview(octets[1:]).cast(typecode)
        return cls(memv)#不用像上一章一样使用*拆包，因为Vector是可迭代的
    
    def __len__(self):
        return len(self._component)
    
    def __getitem__(self, index):
        cls=type(self)
        if isinstance(index,slice):
            return cls(self._component[index])
        elif isinstance(index,numbers.Integral):
            return self._component[index]
        else:
            msg='{cls.__name__} indices must be itegers'
            raise TypeError(msg.format(cls=cls))  
      
     #注意，我们没有禁止为所有全部属性赋值，只是禁止为单个小写字母属性赋值，以防与只读属性x,y,z,t混淆   
    def __setattr__(self, name, value):
        cls = type(self)
        if len(name) == 1:  # <1>
            if name in cls.shotcut_names:  # <2>
                error = 'readonly attribute {attr_name!r}'
            elif name.islower():  # <3>
                error = "can't set attributes 'a' to 'z' in {cls_name!r}"
            else:
                error = ''  # <4>
            if error:  # <5>
                msg = error.format(cls_name=cls.__name__, attr_name=name)
                raise AttributeError(msg)
        super().__setattr__(name, value)  # <6>
        
    def __hash__(self):
        hashes=(hash(x) for x in self._component)
        return functools.reduce(operator.xor,hashes,0)#0是初始值

In [75]:
#使用zipheall函数实现Vector.__eq__方法
def __eq__(self,other):
    return len(self)==len(other) and all(a==b for a,b in zip(self,other))


In [76]:
#出色的zip
zip(range(3),'ABC')

<zip at 0x289c85db048>

In [77]:
list(zip(range(3),'ABC'))

[(0, 'A'), (1, 'B'), (2, 'C')]

In [80]:
list(zip(range(3),'ABC',[0,1,2,3]))

[(0, 'A', 0), (1, 'B', 1), (2, 'C', 2)]

In [81]:
from itertools import zip_longest
list(zip_longest(range(3),'ABC',[0,1,2,3]))

[(0, 'A', 0), (1, 'B', 1), (2, 'C', 2), (None, None, 3)]

10.7 Vector类第五版：格式化

In [None]:

from array import array
import reprlib
import math
import numbers
import functools
import operator
import itertools

#序列类型的构造方法最后接受可迭代的对象为参数，因为所有内置的序列类型都是这样
class Vector:
    typecode='d'
    shotcut_names = 'xyzt'
    
    def __getattr__(self, name):
        cls=type(self)
        if len(name)==1:
            pos=cls.shotcut_names.find(name)
            if 0<=pos<len(self._component):
                return self._component[pos]
        msg='{.__name__!r} object has no arrtibute {!r}'
        raise AttributeError(msg.format(cls,name))
        
    
    def __init__(self,components):
        self._component = array(self.typecode,components)
    
    def __iter__(self):
        return iter(self._component)
    
    def __repr__(self):
        components = reprlib.repr(self._component)#reprlib模块可以生成长度有限的表示形式
        components = components[components.find('['):-1]#把字符串插入Vector的构造方法调用之前，去掉前面arra('d'和后面的)
        return 'Vector({})'.format(components)
    
    def __str__(self):
        return str(tuple(self))
    
    def __bytes__(self):
        return (bytes[ord(self.typecode)])+bytes(self._component)
    
    def __eq__(self, other):
        if len(self) != len(other):
            return False
        for a,b in zip(self,other):#zip函数生成一个由元祖构成的生成器
            if a!=b:
                return False
        return True
    
    def __abs__(self):
        return math.sqrt(sum(x*x for x in self))
    
    def __bool__(self):
        return bool(abs(self))
    
    @classmethod
    def frombytes(cls,octets):
        typecode=chr(octets[0])
        memv = memoryview(octets[1:]).cast(typecode)
        return cls(memv)#不用像上一章一样使用*拆包，因为Vector是可迭代的
    
    def __len__(self):
        return len(self._component)
    
    def __getitem__(self, index):
        cls=type(self)
        if isinstance(index,slice):
            return cls(self._component[index])
        elif isinstance(index,numbers.Integral):
            return self._component[index]
        else:
            msg='{cls.__name__} indices must be itegers'
            raise TypeError(msg.format(cls=cls))  
      
     #注意，我们没有禁止为所有全部属性赋值，只是禁止为单个小写字母属性赋值，以防与只读属性x,y,z,t混淆   
    def __setattr__(self, name, value):
        cls = type(self)
        if len(name) == 1:  # <1>
            if name in cls.shotcut_names:  # <2>
                error = 'readonly attribute {attr_name!r}'
            elif name.islower():  # <3>
                error = "can't set attributes 'a' to 'z' in {cls_name!r}"
            else:
                error = ''  # <4>
            if error:  # <5>
                msg = error.format(cls_name=cls.__name__, attr_name=name)
                raise AttributeError(msg)
        super().__setattr__(name, value)  # <6>
        
    def __hash__(self):
        hashes=(hash(x) for x in self._component)
        return functools.reduce(operator.xor,hashes,0)#0是初始值
    
    
    def angle(self, n):  # <2>
        r = math.sqrt(sum(x * x for x in self[n:]))
        a = math.atan2(r, self[n-1])
        if (n == len(self) - 1) and (self[-1] < 0):
            return math.pi * 2 - a
        else:
            return a

    def angles(self):  # <3>
        return (self.angle(n) for n in range(1, len(self)))

    def __format__(self, fmt_spec=''):
        if fmt_spec.endswith('h'):  # hyperspherical coordinates
            fmt_spec = fmt_spec[:-1]
            coords = itertools.chain([abs(self)],
                                     self.angles())  # <4>
            outer_fmt = '<{}>'  # <5>
        else:
            coords = self
            outer_fmt = '({})'  # <6>
        components = (format(c, fmt_spec) for c in coords)  # <7>
        return outer_fmt.format(', '.join(components))  # <8>