# Data Class Builders

> Data classes are like children. They are okay as a starting point, but to participate as a grownup object, they need to take some resposibility.
>> Martin Fowler and Kent Beck

## Overview of Data Class Builders

In [1]:
class Coordinate:

    def __init__(self, lat, lon):
        self.lat = lat
        self.lon = lon

In [2]:
moscow = Coordinate(55.76, 37.62)
moscow

<__main__.Coordinate at 0x7fbae8205dd0>

In [3]:
location = Coordinate(55.76, 37.62)
location == moscow

False

In [4]:
(location.lat, location.lon) == (moscow.lat, moscow.lon)

True

In [5]:
from collections import namedtuple
Coordinate = namedtuple('Coordinate', 'lat lon')
issubclass(Coordinate, tuple)

True

In [6]:
moscow = Coordinate(55.756, 37.617)
moscow

Coordinate(lat=55.756, lon=37.617)

In [7]:
moscow == Coordinate(lat=55.756, lon=37.617)

True

In [8]:
import typing
Coordinate = typing.NamedTuple('Coordinate',
                               [('lat', float), ('lon', float)]
)
issubclass(Coordinate, tuple)

True

In [9]:
typing.get_type_hints(Coordinate)

{'lat': float, 'lon': float}

In [12]:
from typing import NamedTuple

class Coordinate(NamedTuple):
    lat: float
    lon: float

    def __str__(self):
        ns = 'N' if self.lat >= 0 else 'S'
        we = 'E' if self.lon >= 0 else 'W'
        return f'{abs(self.lat):.1f}°{ns}, {abs(self.lon):.1f}°{we}'

In [18]:
issubclass(Coordinate, tuple)

True

In [19]:
from dataclasses import dataclass

@dataclass(frozen=True)
class Coordinate:
    lat: float
    lon: float

    def __str__(self):
        ns = 'N' if self.lat >= 0 else 'S'
        we = 'E' if self.lon >= 0 else 'W'
        return f'{abs(self.lat):.1f}°{ns}, {abs(self.lon):.1f}°{we}'    

## Classic Named Tuples

The `collections.namedtuple` function is a factory that builds subclasses of `tuple` enhanced with field names, a class name, and an informative `__repr__`. Classes built with `namedtuple` can be used anywhere where tuples are needed, and in fact many functions of the Python standard library that are used to return tuples now return named tuples for convenience, without affecting the user’s code at all.

In [20]:
from collections import namedtuple

City = namedtuple('City', 'name country population coordinates')
tokyo = City('Tokyo', 'JP', 36.933, (35.689722, 139.691667))
tokyo

City(name='Tokyo', country='JP', population=36.933, coordinates=(35.689722, 139.691667))

In [21]:
tokyo.population

36.933

In [22]:
tokyo.coordinates

(35.689722, 139.691667)

In [23]:
tokyo[1]

'JP'

In [24]:
City._fields

('name', 'country', 'population', 'coordinates')

In [25]:
Coordinate = namedtuple('Coordinate', 'lat lon')
delhi_data = ('Delhi NCR', 'IN', 21.935, Coordinate(28.613889, 77.208889))
delhi = City._make(delhi_data)
delhi._asdict()

{'name': 'Delhi NCR',
 'country': 'IN',
 'population': 21.935,
 'coordinates': Coordinate(lat=28.613889, lon=77.208889)}

In [26]:
import json
json.dumps(delhi._asdict())

'{"name": "Delhi NCR", "country": "IN", "population": 21.935, "coordinates": [28.613889, 77.208889]}'

In [27]:
Coordinate = namedtuple('Coordinate', 'lat lon reference', defaults=['WGS84'])
Coordinate(0, 0)

Coordinate(lat=0, lon=0, reference='WGS84')

In [28]:
Coordinate._field_defaults

{'reference': 'WGS84'}

## Typed Named Tuples

The `Coordinate` class with a default field from before can be written using `typing.NamedTuple` as shown:

In [29]:
from typing import NamedTuple

class Coordinate(NamedTuple):
    lat: float
    lon: float
    reference: str = 'WGS84'

### No Runtime Effect

Think about Python type hints as “documentation that can be verified by IDEs and type checkers.”

In [30]:
import typing

class Coordinate(typing.NamedTuple):
    lat: float
    lon: float

trash = Coordinate('Ni!', None)
print(trash)

Coordinate(lat='Ni!', lon=None)


In [31]:
class DemoPlainClass:
    a: int
    b: float = 1.1
    c = 'spam'

In [32]:
DemoPlainClass.__annotations__

{'a': int, 'b': float}

In [37]:
try:
   print(f"{DemoPlainClass.a=}")
except Exception as e:
    print(f"{e=}")

e=AttributeError("type object 'DemoPlainClass' has no attribute 'a'")


In [36]:
try:
    print(f"{DemoPlainClass.b=}")
except Exception as e:
    print(f"{e=}")

DemoPlainClass.b=1.1


In [38]:
try:
    print(f"{DemoPlainClass.c=}")
except Exception as e:
    print(f"{e=}")

DemoPlainClass.c='spam'


In [39]:
import typing

class DemoNTClass(typing.NamedTuple):
    a: int
    b: float = 1.1
    c = 'spam'

In [40]:
DemoNTClass.__annotations__

{'a': int, 'b': float}

In [41]:
DemoNTClass.a

_tuplegetter(0, 'Alias for field number 0')

In [42]:
DemoNTClass.b

_tuplegetter(1, 'Alias for field number 1')

In [43]:
DemoNTClass.c

'spam'

In [44]:
DemoNTClass.__doc__

'DemoNTClass(a, b)'

In [45]:
nt = DemoNTClass(8)
nt.a

8

In [46]:
nt.b

1.1

In [47]:
nt.c

'spam'

In [50]:
try:
    nt.a += 1
except Exception as e:
    print(f"{e=}")

e=AttributeError("can't set attribute")


In [51]:
try:
    nt.b += 1
except Exception as e:
    print(f"{e=}")

e=AttributeError("can't set attribute")


In [53]:
try:
    nt.c += "1"
except Exception as e:
    print(f"{e=}")

e=AttributeError("'DemoNTClass' object attribute 'c' is read-only")


In [54]:
from dataclasses import dataclass

@dataclass
class DemoDataClass:
    a: int
    b: float = 1.1
    c = 'spam'

In [55]:
DemoDataClass.__annotations__

{'a': int, 'b': float}

In [56]:
DemoDataClass.__doc__

'DemoDataClass(a: int, b: float = 1.1)'

In [58]:
try:
    print(f"{DemoDataClass.a=}")
except Exception as e:
    print(f"{e=}")

e=AttributeError("type object 'DemoDataClass' has no attribute 'a'")


In [59]:
try:
    print(f"{DemoDataClass.b=}")
except Exception as e:
    print(f"{e=}")

DemoDataClass.b=1.1


In [60]:
try:
    print(f"{DemoDataClass.c=}")
except Exception as e:
    print(f"{e=}")

DemoDataClass.c='spam'


In [61]:
dc = DemoDataClass(9)
dc.a

9

In [62]:
dc.b

1.1

In [63]:
dc.c

'spam'

In [64]:
dc.a = 10

In [65]:
dc.b = 'oops'

In [67]:
dc.c = 'whatever'
dc.z = 'secret stash'

## More About @dataclass

### Field Options

In [70]:
from dataclasses import dataclass, field

@dataclass
class ClubMember:
    name: str
    guests: list[str] = field(default_factory=list)
    athlete: bool = field(default=False, repr=False)

### Post-init Processing

The `__init__` method generated by `@dataclass` only takes the arguments passed and assigns them—or their default values, if missing—to the instance attributes that are instance fields. But you may need to do more than that to initialize the instance. If that’s the case, you can provide a `__post_init__` method. When that method exists, @dataclass will add code to the generated `__init__` to call `__post_init__` as the last step.

In [72]:
"""
``HackerClubMember`` objects accept an optional ``handle`` argument::

     >>> anna = HackerClubMember('Anna Ravenscroft', handle='AnnaRaven')
     >>> anna
     HackerClubMember(name='Anna Ravenscroft', guests=[], handle='AnnaRaven')

If ``handle`` is omitted, it's set to the first part of the member's name::

    >>> leo = HackerClubMember('Leo Rochael')
    >>> leo
    HackerClubMember(name='Leo Rochael', guests=[], handle='Leo')
    
Members must have a unique handle. The following ``leo2`` will not be created,
because its ``handle`` would be 'Leo', which was taken by ``leo``::

    >>> leo2 = HackerClubMember('Leo DaVinci')
    Traceback (most recent call last):
    ValueError: handle 'Leo' already exists.
    
To fix, ``leo2`` must be created with an explicit ``handle``::

    >>> leo2 = HackerClubMember('Leo DaVinci', handle='Neo')
    >>> leo2
    HackerClubMember(name='Leo DaVinci', guests=[], handle='Neo') 
"""

from dataclasses import dataclass

@dataclass
class HackerClubMember(ClubMember):
    all_handles = set()
    handle: str = ''
    
    def __post_init__(self):
        cls = self.__class__
        if self.handle == '':
            self.handle = self.name.split()[0]
        if self.handle in cls.all_handles:
            msg = f'handle {self.handle} already exists.'
            raise ValueError(msg)
        cls.all_handles.add(self.handle)

### @dataclass Example: Dublin Core Resource Record

In [86]:
from dataclasses import dataclass, field, fields
from typing import Optional
from enum import Enum, auto
from datetime import date

class ResourceType(Enum):
    BOOK = auto()
    EBOOK = auto()
    VIDEO = auto()

@dataclass    
class Resource:
    """Media resource description."""
    identifier: str
    title: str = '<untitled>'
    creators: list[str] = field(default_factory=list)
    date: Optional[date] = None
    type: ResourceType = ResourceType.BOOK
    description: str = ''
    language: str = ''
    subjects: list[str] = field(default_factory=list)
    
    def __repr__(self):
        cls = self.__class__
        cls_name = cls.__name__
        indent = ' '*4
        res = [f'{cls_name}(']
        for f in fields(cls):
            value = getattr(self, f.name)
            res.append(f"{indent}{f.name} = {value!r},")
        res.append(')')
        return '\n'.join(res)

In [87]:
description = 'Improving the design of existing code'
book = Resource('978-0-13-475759-9', 'Refactoring, 2nd Edition',
                ['Martin Fowler', 'Kent Beck'], date(2018, 11, 19),
                ResourceType.BOOK, description, 'EN',
                ['computer programming', 'OOP']
                )
book

Resource(
    identifier = '978-0-13-475759-9',
    title = 'Refactoring, 2nd Edition',
    creators = ['Martin Fowler', 'Kent Beck'],
    date = datetime.date(2018, 11, 19),
    type = <ResourceType.BOOK: 1>,
    description = 'Improving the design of existing code',
    language = 'EN',
    subjects = ['computer programming', 'OOP'],
)

## Pattern Matching Class Instances

Class patterns are designed to match class instances by type and—optionally—by attributes. The subject of a class pattern can be any class instance, not only instances of data classes.

### Keyword Class Patterns

In [88]:
import typing

class City(typing.NamedTuple):
    continent: str
    name: str
    country: str

cities = [
     City('Asia', 'Tokyo', 'JP'),
     City('Asia', 'Delhi', 'IN'),
     City('North America', 'Mexico City', 'MX'),
     City('North America', 'New York', 'US'),
     City('South America', 'São Paulo', 'BR'),
]

def match_asian_cities():
    results = []
    for city in cities:
        match city:
            case City(continent='Asia'):
                results.append(city)
    return results

In [89]:
match_asian_cities()

[City(continent='Asia', name='Tokyo', country='JP'),
 City(continent='Asia', name='Delhi', country='IN')]

In [92]:
def match_asian_countries():
    results = []
    for city in cities:
        match city:
            case City(continent='Asia', country=cc):
                results.append(cc)
    return results

In [93]:
match_asian_countries()

['JP', 'IN']

### Positional Class Patterns

In [94]:
def match_asian_cities_pos():
    results = []
    for city in cities:
        match city:
            case City('Asia'):
                results.append(city)
    return results

In [95]:
match_asian_cities_pos()

[City(continent='Asia', name='Tokyo', country='JP'),
 City(continent='Asia', name='Delhi', country='IN')]

In [96]:
def match_asian_countries_pos():
    results = []
    for city in cities:
        match city:
            case City('Asia', _, country):
                results.append(country)
    return results

In [97]:
match_asian_countries_pos()

['JP', 'IN']

In [99]:
City.__match_args__

('continent', 'name', 'country')