## 自定义数据类型

In [82]:
%time
from math import hypot

class Vector:
    def __init__(self, x=0, y=0):
        self.x = x
        self.y = y

    def __repr__(self):
        '''
        面向开发者
        '''
        return 'Vector(%r, %r)' % (self.x, self.y)

    def __abs__(self):
        return hypot(self.x, self.y)

    def __bool__(self):
        return bool(abs(self))

    def __add__(self, other):
        x = self.x + other.x
        y = self.y + other.y
        return Vector(x, y)
        
    def __mul__(self, scalar):
        return Vector(self.x * scalar, self.y * scalar)

    

v1 = Vector(2, 4)
v2 = Vector(2, 1)
v1 + v2
v1


CPU times: user 3 µs, sys: 0 ns, total: 3 µs
Wall time: 6.68 µs


Vector(2, 4)

In [25]:

class Tag:

    def __init__(self):
        self.item = {}
 
    def __getitem__(self, key):
        self.item = dict(zip(self.keys, self.values))
        return self.item[key]

    def __setitem__(self, key, value):
        self.item[key] = value

    def __delitem__(self, key):
        del self.item[key]
        
    def __repr__(self):
        return  f'dataset({self.item})'

t = Tag()

t['a'] = 2
t['b'] = 3
print(t)
del t['a']
t

dataset({'a': 2, 'b': 3})


dataset({'b': 3})

In [32]:
class CustomerDict:
    # def __init__(self):
    #     self['B'] = "BB"
    #     self['D'] = "DD"
    #     del self['D']

    def __repr__(self):
        return '%r' % ( self.__dict__)   
    def __setitem__(self, name, value):
        '''
        每当属性被赋值的时候都会调用该方法，因此不能再该方法内赋值 self.name = value 会死循环
        '''
        print ("__setitem__:Set %s Value %s"%(name, value))
        self.__dict__[name] = value
    
    def __getitem__(self, name):
        ''' 
        当访问不存在的属性时会调用该方法
        '''
        print ("__getitem__:No attribute named '%s'"%name)
        return None
    
    def __delitem__(self, name):
        ''' 
        当删除属性时调用该方法
        '''
        print ("__delitem__:Delect attribute '%s'"%name)
        del self.__dict__[name]
        print (self.__dict__)
    
    
if __name__ == "__main__":
    x = CustomerDict()
    x['A']=2
    print(x)


__setitem__:Set A Value 2
{'A': 2}


## 使用 __slots__ 类属性节省空间

In [4]:
%time
class IMDbDataset:

    __slots__ =  'encodings','labels'

    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __str__(self):
        '''面向用户'''
        return 'dataset(%r, %r)' % (self.encodings, self.labels)

    def __getitem__(self, idx):
        item = {key: val[idx] for key, val in self.encodings.items()}
        item['labels'] = self.labels[idx]
        return item

    def __len__(self):
        return len(self.labels)

train_encodings = {'input_ids': [[101, 8667, 146, 112, 182, 170, 1423, 5650, 102],
                                 [101, 1262, 1330, 5650, 102],
                                 [101, 1262, 1103, 1304, 1304, 1314, 1141, 102]],
                   'token_type_ids': [[0, 0, 0, 0, 0, 0, 0, 0, 0],
                                      [0, 0, 0, 0, 0],
                                      [0, 0, 0, 0, 0, 0, 0, 0]],
                   'attention_mask': [[1, 1, 1, 1, 1, 1, 1, 1, 1],
                                      [1, 1, 1, 1, 1],
                                      [1, 1, 1, 1, 1, 1, 1, 1]]}

train_labels = [x for x in range(len(train_encodings['input_ids']))]

train_dataset = IMDbDataset(train_encodings, train_labels)

print(train_dataset,'-------')
print(train_dataset[0],'-------')
train_dataset[1]['input_ids']
    

CPU times: user 3 µs, sys: 0 ns, total: 3 µs
Wall time: 4.53 µs
dataset({'input_ids': [[101, 8667, 146, 112, 182, 170, 1423, 5650, 102], [101, 1262, 1330, 5650, 102], [101, 1262, 1103, 1304, 1304, 1314, 1141, 102]], 'token_type_ids': [[0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]], 'attention_mask': [[1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1]]}, [0, 1, 2]) -------
{'input_ids': [101, 8667, 146, 112, 182, 170, 1423, 5650, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0} -------


[101, 1262, 1330, 5650, 102]

## 数据类

In [9]:
from dataclasses import dataclass
'''
该模块提供了一个装饰器和函数，用于自动将生成的特殊方法添加到用户定义的类中，例如__init__()和 __repr__()
@dataclasses.dataclass( * , init = True , repr = True , eq = True , order = False , unsafe_hash = False , \
frozen = False , match_args = True , kw_only = False , slot = False )
'''
@dataclass
class InventoryItem:
    """Class for keeping track of an item in inventory."""
    name: str
    unit_price: float
    quantity_on_hand: int = 0

    def total_cost(self) -> float:
        return self.unit_price * self.quantity_on_hand

a = InventoryItem('a', 2)
a

InventoryItem(name='a', unit_price=2, quantity_on_hand=0)

In [17]:
from dataclasses import dataclass
from dataclasses import field
"""
dataclasses.fields( class_or_instance ) 
返回Field定义此数据类字段的对象元组。接受数据类或数据类的实例。TypeError如果未传递数据类或实例，则引发。不返回为ClassVar或 的伪字段InitVar。
"""
@dataclass
class C:
    a: float
    b: float
    c: float = field(init=False)

    def __post_init__(self):
        self.c = self.a + self.b
    
c = C(1,2)
c

C(a=1, b=2, c=3)

In [28]:
from dataclasses import asdict
'''
将数据类转换instance为字典（通过使用工厂函数dict_factory）。
每个数据类都被转换为其字段的字典，成对。数据类、字典、列表和元组被递归到。例如：name: value
'''
@dataclass
class Point:
     x: int
     y: int

p = Point(10, 20)
c = {'x': 10, 'y': 20}
print(asdict(p))
assert asdict(p) == c



{'x': 10, 'y': 20}
