# 10_序列的修改、散列和切片

- `gensim`: 使用 numpy 和 scipy 实现了用于处理自然语言和检索信息的向量空间模型。
- `reprlib`：生成有限长度的表现形式。

###### from math import sqrt
from array import array
import reprlib
import numbers

class Vector:
    """ 多维向量 """
    typecode = 'd'
    shortcut_names = 'xyzt'
    
    def __init__(self, components):
        self._components = array(self.typecode, components)
    
    def __repr__(self):
        """ 使用 reprlib 显示有限长度的输出 """
        components = reprlib.repr(self._components)
        components = components[components.find('['):-1]
        return "Vector({})".format(components)
    
    def __str__(self):
        return str(tuple(self))
    
    def __format__(self, fmt_spec=''):
        if fmt_spec.endswith('p'):
            fmt_spec = fmt_spec[:-1]
            coords = (abs(self), self.angle())
            outer_fmt = '<{}, {}>'
        else:
            coords = self
            outer_fmt = '({}, {})'
        components = (format(c, fmt_spec) for c in coords)
        return "({}, {})".format(*components)
        
    def __bytes__(self):
        return (bytes([ord(self.typecode)]) + bytes(self._components))
        
    def __iter__(self):
        return iter(self._components)
    
#     def __hash__(self):
#         return hash(self.x) ^ hash(self.y)
    
    def __abs__(self):
        """ Return the Euclidean distance, sqrt(x*x + y*y). """
        return sqrt(sum(x*x for x in self))
        
    def __bool__(self):
        return bool(abs(self))
    
    def __eq__(self, other):
        if isinstance(other, Vector):
            return tuple(self) == tuple(other)
        else:
            raise ValueError("obj is not Vector obj!")
    
#     def __add__(self, other):
#         x = self.x + other.x
#         y = self.y + other.y
#         return Vector(x, y)
    
#     def __mul__(self, scalar):
#         return Vector(self.x * scalar, self.y * scalar)
    
#     def angle(self):
#         return atan2(self.y, self.x)
    
    @classmethod
    def frombytes(cls, octests):
        typecode = chr(octests[0])
        memv = memoryview(octests[1:]).cast(typecode)
        return cls(memv)
    
    # 可切片序列
    def __len__(self):
        return len(self._components)
    
    def __getitem__(self, index):
        """ 可接受 index """
        cls = type(self)
        if isinstance(index, slice):
            return cls(self._components[index])
        elif isinstance(index, numbers.Integral):
            return self._components[index]
        else:
            msg = '{cls.__name__} indices must be integers'
            raise TypeError(msg.format(cls=cls))
            
    # 动态存储属性
    def __getattr__(self, name):
        cls = type(self)
        if len(name) == 1:
            pos =cls.shortcut_names.find(name)
            if 0 <= pos < len(self._components):
                return self._components[pos]
        msg = '{.__name__!r} obj has no attribute {!r}'
        raise AttributeError(msg.format(cls, name))
        
    def __setattr__(self, name, value):
        cls = type(self)
        if len(name) == 1:
            # 判断属性名是否包含
            if name in cls.shortcut_names:
                error = "readonly attribute {attr_name!r}"
            # 
            elif name.islower():
                error = "can't set attributes 'a' to 'z' in {cls_name!r}"
            else:
                error = ''
            if error:
                msg = error.format(cls_name=cls.__name__, attr_name=name)
                raise AttributeError(msg)
        super().__setattr__(name, value)

In [11]:
# 测试
import reprlib
lst = [x for x in range(100)]
lst = reprlib.repr(lst)
print(lst)
print(lst.find('['))
lst1 = lst[lst.find('['):-1]
lst1

[0, 1, 2, 3, 4, 5, ...]
0


'[0, 1, 2, 3, 4, 5, ...'

In [64]:
# 切片示意
v1 = Vector(range(10))
print(f"len(v1) \t {len(v1)}")
print(f"v1[0] \t\t {v1[0]}")
print(f"v1[:2] \t\t {v1[:2]}")
print(f"v1[1:3] \t {v1[1:3]}")
try:
    print(f"v1[1,2] \t {v1[1,2]}")
except Exception as e:
    print(f"Error: {e}")

v1[1:7]

len(v1) 	 10
v1[0] 		 0.0
v1[:2] 		 (0.0, 1.0)
v1[1:3] 	 (1.0, 2.0)
Error: Vector indices must be integers


Vector([1.0, 2.0, 3.0, 4.0, 5.0, ...])

In [84]:
# 动态存储属性
v = Vector(range(5))
print(f"v.x \t {v.x}")

v.x = 10
print(f"v.x \t {v.x}")

v.x 	 0.0


AttributeError: readonly attribute 'x'

## 10.1 协议和鸭子类型

- 协议：不强制要求，可以实现部分方法即可。
- 鸭子类型：

- Python 序列协议：实现 `__len__` 和 `__getitem__` 方法；

In [15]:
import collections

Card = collections.namedtuple('Card', ['rank', 'suit'])

class FrenchDeck:
    ranks = [str(n) for n in range(2, 11)] + list('JQKA')
    suits = 'spades diamonds clubs hearts'.split()
    
    def __init__(self):
        self._cards = [Card(rank, suit) 
                       for suit in self.suits 
                       for rank in self.ranks]
    
    def __len__(self):
        return len(self._cards)
    
    def __getitem__(self, position):
        return self._cards[position]

In [19]:
card = FrenchDeck()
print(f"len(card) \t {len(card)}")
print(f"card[10] \t {card[10]}")
print(f"card[10:12] \t {card[10:12]}")

len(card) 	 52
card[10] 	 Card(rank='Q', suit='spades')
card[10:12] 	 [Card(rank='Q', suit='spades'), Card(rank='K', suit='spades')]


## 10.2 切片原理

In [29]:
class MySeq:
    def __getitem__(self, index):
        return index

In [35]:
s = MySeq()
print(f"s[1] \t {s[1]}")
print(f"s[1:3] \t {s[1:3]}")
print(f"s[1:3:2] \t {s[1:3:2]}")
print(f"s[1:3:2, 9] \t {s[1:3:2, 9]}")
print(f"s[1:3:2, 7:9] \t {s[1:3:2, 7:9]}")

s[1] 	 1
s[1:3] 	 slice(1, 3, None)
s[1:3:2] 	 slice(1, 3, 2)
s[1:3:2, 9] 	 (slice(1, 3, 2), 9)
s[1:3:2, 7:9] 	 (slice(1, 3, 2), slice(7, 9, None))


In [40]:
# 列表测试
s = [i for i in range(10)]
print(f"s[1] \t\t {s[1]}")
print(f"s[1:3] \t\t {s[1:3]}")
print(f"s[1:3:2] \t {s[1:3:2]}")
try:
    print(f"s[1:3:2, 9] \t {s[1:3:2, 9]}")
    print(f"s[1:3:2, 7:9] \t {s[1:3:2, 7:9]}")
except Exception as e:
    print(f"Error: {e}")

s[1] 		 1
s[1:3] 		 [1, 2]
s[1:3:2] 	 [1]
Error: list indices must be integers or slices, not tuple


In [49]:
print(f"slice \t {slice}")
print(f"dir(slice) \n {dir(slice)}")
print(f"slice.indices \t {slice.indices}")

print(f"slice(None, 10, 2).indices(5) \t {slice(None, 10, 2).indices(5)}")
print(f"slice(-3, None, None).indices(5) \t {slice(-3, None, None).indices(5)}")

slice 	 <class 'slice'>
dir(slice) 
 ['__class__', '__delattr__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', 'indices', 'start', 'step', 'stop']
slice.indices 	 <method 'indices' of 'slice' objects>
slice(None, 10, 2).indices(5) 	 (0, 5, 2)
slice(-3, None, None).indices(5) 	 (2, 5, 1)
