In [82]:
%time
from math import hypot

class Vector:
    def __init__(self, x=0, y=0):
        self.x = x
        self.y = y

    def __repr__(self):
        '''
        面向开发者
        '''
        return 'Vector(%r, %r)' % (self.x, self.y)

    def __abs__(self):
        return hypot(self.x, self.y)

    def __bool__(self):
        return bool(abs(self))

    def __add__(self, other):
        x = self.x + other.x
        y = self.y + other.y
        return Vector(x, y)
        
    def __mul__(self, scalar):
        return Vector(self.x * scalar, self.y * scalar)

v1 = Vector(2, 4)
v2 = Vector(2, 1)
v1 + v2
v1


CPU times: user 3 µs, sys: 0 ns, total: 3 µs
Wall time: 6.68 µs


Vector(2, 4)

In [195]:

class Tag:
    def __init__(self):
        self.item = {}
 
    def __getitem__(self, key):
        self.item = dict(zip(self.keys, self.values))
        return self.item[key]


    def __setitem__(self, key, value):
        self.item[key] = value
        
    def __repr__(self):
        return  'dataset(%r)' % (self.item)

t = Tag()

t['a'] = 2
t['b'] = 3
t


dataset({'a': 2, 'b': 3})

In [164]:
%time
class IMDbDataset:
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __str__(self):
        '''面向用户'''
        return 'dataset(%r, %r)' % (self.encodings, self.labels)

    def __getitem__(self, idx):
        item = {key: val[idx] for key, val in self.encodings.items()}
        item['labels'] = self.labels[idx]
        return item

    def __len__(self):
        return len(self.labels)

train_encodings = {'input_ids': [[101, 8667, 146, 112, 182, 170, 1423, 5650, 102],
                                 [101, 1262, 1330, 5650, 102],
                                 [101, 1262, 1103, 1304, 1304, 1314, 1141, 102]],
                   'token_type_ids': [[0, 0, 0, 0, 0, 0, 0, 0, 0],
                                      [0, 0, 0, 0, 0],
                                      [0, 0, 0, 0, 0, 0, 0, 0]],
                   'attention_mask': [[1, 1, 1, 1, 1, 1, 1, 1, 1],
                                      [1, 1, 1, 1, 1],
                                      [1, 1, 1, 1, 1, 1, 1, 1]]}

train_labels = [x for x in range(len(train_encodings['input_ids']))]

train_dataset = IMDbDataset(train_encodings, train_labels)

print(train_dataset,'-------')
print(train_dataset[0],'-------')
train_dataset[1]['input_ids']
    

CPU times: user 3 µs, sys: 1e+03 ns, total: 4 µs
Wall time: 23.6 µs
dataset({'input_ids': [[101, 8667, 146, 112, 182, 170, 1423, 5650, 102], [101, 1262, 1330, 5650, 102], [101, 1262, 1103, 1304, 1304, 1314, 1141, 102]], 'token_type_ids': [[0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]], 'attention_mask': [[1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1]]}, [0, 1, 2]) -------
{'input_ids': [101, 8667, 146, 112, 182, 170, 1423, 5650, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 0} -------


[101, 1262, 1330, 5650, 102]

## 数据类

In [86]:
from dataclasses import dataclass

@dataclass
class InventoryItem:
    """Class for keeping track of an item in inventory."""
    name: str
    unit_price: float
    quantity_on_hand: int = 0

    def total_cost(self) -> float:
        return self.unit_price * self.quantity_on_hand

a = InventoryItem('a', 2)
a

InventoryItem(name='a', unit_price=2, quantity_on_hand=0)

In [108]:
from dataclasses import field
@dataclass
class C:
    a: float
    b: float
    c: float = dataclasses.field(init=False)

    def __post_init__(self):
        self.c = self.a + self.b
    
c = C(1,2)
c

C(a=1, b=2, c=3)

In [106]:
from dataclasses import asdict

@dataclass
class Point:
     x: int
     y: int

p = Point(10, 20)
assert asdict(p) == {'x': 10, 'y': 20}


In [137]:
import collections
# collections.namedtuple 是一个工厂函数，它可以用来构建一个带
# 字段名的元组和一个有名字的类
Card = collections.namedtuple('Card', ['rank', 'suit'])

class FrenchDeck:
    ranks = [str(n) for n in range(2, 11)] + list('JQKA')
    suits = 'spades diamonds clubs hearts'.split()

    def __init__(self):
        self._cards = [Card(rank, suit) for suit in self.suits
                                        for rank in self.ranks]
    def __len__(self):
        return len(self._cards)
    
    def __getitem__(self, position):
        return self._cards[position]
        
a = FrenchDeck()

a._cards[0]

Card(rank='2', suit='spades')