# Data classes

In this section we will see Python features to avoid boilerplate when creating classes that are essentially collections of fields, similar to a C struct or a database record.

* ``collections.namedtuple``
* ``typing.NamedTuple``
* ``dataclasses.dataclass``

## collections.nametuple

In [1]:
from collections import namedtuple

Coordinate = namedtuple('Coordinate', 'lat long')
Coordinate.__mro__

(__main__.Coordinate, tuple, object)

In [2]:
cle = Coordinate(41.40, -81.85)
cle

Coordinate(lat=41.4, long=-81.85)

Simple to use, and is a tuple, so you can do this:

In [3]:
latitude, longitude = cle
latitude

41.4

In [4]:
longitude

-81.85

Includes ``__eq__`` that knows how to compare with tuples:

In [5]:
(latitude, longitude) == cle

True

## namedtuple limitations

* instances are immutable;
* no simple way to implement custom methods.

## typing.NamedTuple

Introduced in Python 3.5, with [PEP 526](https://www.python.org/dev/peps/pep-0526) variable annotation syntax added in Python 3.6.

In [6]:
from typing import NamedTuple

class Coordinate(NamedTuple):

    lat: float = 0
    long: float = 0
        
    reference_system = 'WGS84'

    def __str__(self):
        ns = 'N' if self.lat >= 0 else 'S'
        we = 'E' if self.long >= 0 else 'W'
        return f'{abs(self.lat):.1f}°{ns}, {abs(self.long):.1f}°{we}'

In [7]:
gulf_of_guinea = Coordinate()
g = gulf_of_guinea
g

Coordinate(lat=0, long=0)

In [8]:
dir(gulf_of_guinea)

['__add__',
 '__annotations__',
 '__class__',
 '__contains__',
 '__delattr__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__getnewargs__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__len__',
 '__lt__',
 '__module__',
 '__mul__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__rmul__',
 '__setattr__',
 '__sizeof__',
 '__slots__',
 '__str__',
 '__subclasshook__',
 '_asdict',
 '_field_defaults',
 '_field_types',
 '_fields',
 '_fields_defaults',
 '_make',
 '_replace',
 'count',
 'index',
 'lat',
 'long',
 'reference_system']

In [9]:
from collections import namedtuple
from typing import NamedTuple
t = tuple()
c1 = Coordinate1 = namedtuple('Coordinate1', 'lat long')(1, 2)
c2 = Coordinate2 = NamedTuple('Coordinate2', [('lat', float), ('long', float)])(1, 2)

In [10]:
set(dir(c1)) - set(dir(t))

{'__module__',
 '__slots__',
 '_asdict',
 '_field_defaults',
 '_fields',
 '_fields_defaults',
 '_make',
 '_replace',
 'lat',
 'long'}

In [11]:
c2._field_types is c2.__annotations__

True

In [12]:
Coordinate.__dict__

mappingproxy({'__doc__': 'Coordinate(lat, long)',
              '__slots__': (),
              '_fields': ('lat', 'long'),
              '_field_defaults': {'lat': 0, 'long': 0},
              '_fields_defaults': {},
              '__new__': <staticmethod at 0x1096c5ee0>,
              '_make': <classmethod at 0x1096c5fd0>,
              '_replace': <function collections.Coordinate._replace(self, /, **kwds)>,
              '__repr__': <function collections.Coordinate.__repr__(self)>,
              '_asdict': <function collections.Coordinate._asdict(self)>,
              '__getnewargs__': <function collections.Coordinate.__getnewargs__(self)>,
              'lat': <_collections._tuplegetter at 0x1096c5d90>,
              'long': <_collections._tuplegetter at 0x1096c5e80>,
              '__module__': '__main__',
              '__annotations__': {'lat': float, 'long': float},
              '_field_types': {'lat': float, 'long': float},
              'reference_system': 'WGS84',
          

In [13]:
for k, v in Coordinate.__dict__.items():
    if not k.startswith('_'):
        print(k,':', v)

lat : <_collections._tuplegetter object at 0x1096c5d90>
long : <_collections._tuplegetter object at 0x1096c5e80>
reference_system : WGS84


In [14]:
cle = Coordinate(41.40, -81.85)
print(cle)

41.4°N, 81.8°W


In [15]:
try:
    cle.lat = 0
except AttributeError as e:
    print(e)

can't set attribute


In [16]:
cle.reference_system

'WGS84'

In [17]:
try:
    cle.reference_system = 'X'
except AttributeError as e:
    print(e)

'Coordinate' object attribute 'reference_system' is read-only


## @dataclass

### Coordinate as dataclass

In [18]:
from dataclasses import dataclass

from typing import ClassVar

@dataclass(frozen=True)
class Coordinate:
    lat: float
    long: float = 0
        
    reference_system: ClassVar[str] = 'WGS84'

    def __str__(self):
        ns = 'NS'[self.lat < 0]
        we = 'EW'[self.long < 0]
        return f'{abs(self.lat):.1f}°{ns}, {abs(self.long):.1f}°{we}'

In [19]:
for k, v in Coordinate.__dict__.items():
    if not k.startswith('_'):
        print(k,':', v)

long : 0
reference_system : WGS84


In [20]:
cle = Coordinate(41.40, -81.85)
cle

Coordinate(lat=41.4, long=-81.85)

In [21]:
print(cle)

41.4°N, 81.8°W


In [22]:
import dataclasses

try:
    cle.lat = 0.0
except dataclasses.FrozenInstanceError as exc:
    print(repr(exc))

FrozenInstanceError("cannot assign to field 'lat'")


### @dataclass options

```
@dataclasses.dataclass(*, 
    init=True, repr=True, eq=True, order=False, unsafe_hash=False, frozen=False)
```

<table>
<tr><th>option</th><th>default</th><th style="text-align: left;">meaning</th></tr>
<tr><td>init</td><td>True</td>
    <td style="text-align: left;">generate <code>__init__</code>¹</td></tr>
<tr><td>repr</td><td>True</td>
    <td style="text-align: left;">generate <code>__repr__</code>¹</td></tr>
<tr><td>eq</td><td>True</td>
    <td style="text-align: left;">generate <code>__eq__</code>¹</td></tr>
<tr><td>order</td><td>False</td>
    <td style="text-align: left;">generate <code>__lt__</code>, <code>__le__</code>, <code>__gt__</code>, <code>__ge__</code>²</td></tr>
<tr><td>unsafe_hash</td><td>False</td>
    <td style="text-align: left;">generate <code>__hash__</code>³</td></tr>
<tr><td>frozen</td><td>False</td>
    <td style="text-align: left;">make instances "immutable" ⁴</td></tr>
</table>

**Notes**

¹ Ignored if the special method is implemented by user.<br>
² Raises exceptions if ``eq=False`` or any of the listed special methods are implemented by user.<br>
³ Complex semantics and several caveats — see: [dataclass documentation](https://docs.python.org/3/library/dataclasses.html#dataclasses.dataclass).<br>
⁴ Not really immutable — imutability is emulated generating ``__setattr__`` and ``__delattr__`` which raise ``dataclass.FrozenInstanceError`` (a subclass of ``AttributeError``).

### Example: a Dublin Core resource dataclass

In [23]:
from dataclasses import dataclass, field, fields
from typing import List

@dataclass
class Resource:
    """Media resource description."""
    identifier: str = "0" * 13
    title: str = "<untitled>"
    creators: List[str] = field(default_factory=list)
    date: str = ""
    type: str = ""
    description: str = ""
    language: str = ""
    subjects: List[str] = field(default_factory=list)


In [24]:
description = 'A hands-on guide to idiomatic Python code.'
book = Resource('9781491946008', 'Fluent Python', 
    ['Luciano Ramalho'], '2015-08-20', 'book', description,
    'EN', ['computer programming', 'Python'])
book

Resource(identifier='9781491946008', title='Fluent Python', creators=['Luciano Ramalho'], date='2015-08-20', type='book', description='A hands-on guide to idiomatic Python code.', language='EN', subjects=['computer programming', 'Python'])

### Resource with custom \_\_repr\_\_

In [25]:
from dataclasses import dataclass, field, fields
from typing import List

@dataclass
class Resource:
    """Media resource description."""
    identifier: str = "0" * 13
    title: str = "<untitled>"
    creators: List[str] = field(default_factory=list)
    date: str = ""
    type: str = ""
    description: str = ""
    language: str = ""
    subjects: List[str] = field(default_factory=list)


    def __repr__(self):
        cls = self.__class__
        cls_name = cls.__name__
        res = [f'{cls_name}(']
        for field in fields(cls):
            value = getattr(self, field.name)
            res.append(f'    {field.name} = {value!r},')
        res.append(f')')
        return '\n'.join(res)

In [26]:
description = 'A hands-on guide to idiomatic Python code.'
book = Resource('9781491946008', 'Fluent Python', 
    ['Luciano Ramalho'], '2015-08-20', 'book', description,
    'EN', ['computer programming', 'Python'])
book

Resource(
    identifier = '9781491946008',
    title = 'Fluent Python',
    creators = ['Luciano Ramalho'],
    date = '2015-08-20',
    type = 'book',
    description = 'A hands-on guide to idiomatic Python code.',
    language = 'EN',
    subjects = ['computer programming', 'Python'],
)

In [27]:
book2 = eval(repr(book))

In [28]:
book2 == book

True

In [29]:
empty = Resource()
empty

Resource(
    identifier = '0000000000000',
    title = '<untitled>',
    creators = [],
    date = '',
    type = '',
    description = '',
    language = '',
    subjects = [],
)

### See docs for the field function

In [30]:
field?

<img src="img/thoughtworks.png" width="300" title="ThoughtWorks, Inc. logo">