Source: [Real Python: Data Classes](https://realpython.com/python-data-classes/)

In [1]:
from dataclasses import dataclass, field, fields
from typing import Any, List, Tuple
from math import asin, cos, radians, sin, sqrt
from random import sample
from timeit import timeit

# Intro + Basic Dataclasses

In [2]:
@dataclass
class DataClassCard:
    rank: str
    suit: str

queen_of_hearts = DataClassCard('Q', 'Hearts')

## Printing class

In [3]:
queen_of_hearts.rank

'Q'

## Compare dataclasses

In [4]:
queen_of_hearts == DataClassCard('Q', 'Hearts')

True

# Default values

you can use default values
It's probably better to create a class method for this printed text

In [5]:
@dataclass
class Position:
    name: str
    lon: float = 0.0
    lat: float = 0.0

pos = Position('Oslo', 59.9)
print(f'{pos.name} is at {pos.lat}°N, {pos.lon}°E')

Oslo is at 0.0°N, 59.9°E


# Type hints

In [6]:
@dataclass
class WithoutExplicitTypes:
    name: Any
    value: Any = 42

Position(3.14, 'pi day', 2018)

Position(name=3.14, lon='pi day', lat=2018)

Apparently, the data types are not enforced? It's purely cosmetic?

In [7]:
Position(3.14, 'pi day', 2018)

Position(name=3.14, lon='pi day', lat=2018)

# Adding methods
You can add methods, just like with a regular class

In [8]:
@dataclass
class Position:
    name: str
    lon: float = 0.0
    lat: float = 0.0

    def distance_to(self, other):
        r = 6371  # Earth radius in kilometers
        lam_1, lam_2 = radians(self.lon), radians(other.lon)
        phi_1, phi_2 = radians(self.lat), radians(other.lat)
        h = (sin((phi_2 - phi_1) / 2)**2
             + cos(phi_1) * cos(phi_2) * sin((lam_2 - lam_1) / 2)**2)
        return 2 * r * asin(sqrt(h))

oslo = Position('Oslo', 10.8, 59.9)
vancouver = Position('Vancouver', -123.1, 49.3)
oslo.distance_to(vancouver)

7181.7841229421165

# More Flexible Data Classes

A dataclass that contains dataclass objects in  a list

In [9]:
@dataclass
class PlayingCard:
    rank: str
    suit: str

@dataclass
class Deck:
    cards: List[PlayingCard]

queen_of_hearts = PlayingCard('Q', 'Hearts')
ace_of_spades = PlayingCard('A', 'Spades')
two_cards = Deck([queen_of_hearts, ace_of_spades])
two_cards

Deck(cards=[PlayingCard(rank='Q', suit='Hearts'), PlayingCard(rank='A', suit='Spades')])

## Advanced default values

I you want to assign a list, you need to use default_factory.

In [10]:
RANKS = '2 3 4 5 6 7 8 9 10 J Q K A'.split()
SUITS = '♣ ♢ ♡ ♠'.split()

def make_french_deck():
    return [PlayingCard(r, s) for s in SUITS for r in RANKS]

@dataclass
class Deck:
    cards: List[PlayingCard] = field(default_factory=make_french_deck)

Deck()

Deck(cards=[PlayingCard(rank='2', suit='♣'), PlayingCard(rank='3', suit='♣'), PlayingCard(rank='4', suit='♣'), PlayingCard(rank='5', suit='♣'), PlayingCard(rank='6', suit='♣'), PlayingCard(rank='7', suit='♣'), PlayingCard(rank='8', suit='♣'), PlayingCard(rank='9', suit='♣'), PlayingCard(rank='10', suit='♣'), PlayingCard(rank='J', suit='♣'), PlayingCard(rank='Q', suit='♣'), PlayingCard(rank='K', suit='♣'), PlayingCard(rank='A', suit='♣'), PlayingCard(rank='2', suit='♢'), PlayingCard(rank='3', suit='♢'), PlayingCard(rank='4', suit='♢'), PlayingCard(rank='5', suit='♢'), PlayingCard(rank='6', suit='♢'), PlayingCard(rank='7', suit='♢'), PlayingCard(rank='8', suit='♢'), PlayingCard(rank='9', suit='♢'), PlayingCard(rank='10', suit='♢'), PlayingCard(rank='J', suit='♢'), PlayingCard(rank='Q', suit='♢'), PlayingCard(rank='K', suit='♢'), PlayingCard(rank='A', suit='♢'), PlayingCard(rank='2', suit='♡'), PlayingCard(rank='3', suit='♡'), PlayingCard(rank='4', suit='♡'), PlayingCard(rank='5', suit='♡

You can add metadata like this:

In [11]:
@dataclass
class Position:
    name: str
    lon: float = field(default=0.0, metadata={'unit': 'degrees'})
    lat: float = field(default=0.0, metadata={'unit': 'degrees'})

fields(Position)

(Field(name='name',type=<class 'str'>,default=<dataclasses._MISSING_TYPE object at 0x75621ab42f00>,default_factory=<dataclasses._MISSING_TYPE object at 0x75621ab42f00>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({}),kw_only=False,_field_type=_FIELD),
 Field(name='lon',type=<class 'float'>,default=0.0,default_factory=<dataclasses._MISSING_TYPE object at 0x75621ab42f00>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({'unit': 'degrees'}),kw_only=False,_field_type=_FIELD),
 Field(name='lat',type=<class 'float'>,default=0.0,default_factory=<dataclasses._MISSING_TYPE object at 0x75621ab42f00>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({'unit': 'degrees'}),kw_only=False,_field_type=_FIELD))

And retrieve metadata like this:

In [12]:
print(fields(Position)[0].metadata)
print(fields(Position)[1].metadata)
print(fields(Position)[2].metadata['unit'])

{}
{'unit': 'degrees'}
degrees


## You Need Representation?

Let's look at the original

In [13]:
Deck()

Deck(cards=[PlayingCard(rank='2', suit='♣'), PlayingCard(rank='3', suit='♣'), PlayingCard(rank='4', suit='♣'), PlayingCard(rank='5', suit='♣'), PlayingCard(rank='6', suit='♣'), PlayingCard(rank='7', suit='♣'), PlayingCard(rank='8', suit='♣'), PlayingCard(rank='9', suit='♣'), PlayingCard(rank='10', suit='♣'), PlayingCard(rank='J', suit='♣'), PlayingCard(rank='Q', suit='♣'), PlayingCard(rank='K', suit='♣'), PlayingCard(rank='A', suit='♣'), PlayingCard(rank='2', suit='♢'), PlayingCard(rank='3', suit='♢'), PlayingCard(rank='4', suit='♢'), PlayingCard(rank='5', suit='♢'), PlayingCard(rank='6', suit='♢'), PlayingCard(rank='7', suit='♢'), PlayingCard(rank='8', suit='♢'), PlayingCard(rank='9', suit='♢'), PlayingCard(rank='10', suit='♢'), PlayingCard(rank='J', suit='♢'), PlayingCard(rank='Q', suit='♢'), PlayingCard(rank='K', suit='♢'), PlayingCard(rank='A', suit='♢'), PlayingCard(rank='2', suit='♡'), PlayingCard(rank='3', suit='♡'), PlayingCard(rank='4', suit='♡'), PlayingCard(rank='5', suit='♡

Let's change PlayingCard to make the string repr metter

In [14]:
from dataclasses import dataclass

@dataclass
class PlayingCard:
    rank: str
    suit: str

    def __str__(self):
        return f'{self.suit}{self.rank}'

Nicer

In [15]:
print(PlayingCard('A', '♠'))

♠A


But this one is still very verbose

In [16]:
print(Deck())

Deck(cards=[PlayingCard(rank='2', suit='♣'), PlayingCard(rank='3', suit='♣'), PlayingCard(rank='4', suit='♣'), PlayingCard(rank='5', suit='♣'), PlayingCard(rank='6', suit='♣'), PlayingCard(rank='7', suit='♣'), PlayingCard(rank='8', suit='♣'), PlayingCard(rank='9', suit='♣'), PlayingCard(rank='10', suit='♣'), PlayingCard(rank='J', suit='♣'), PlayingCard(rank='Q', suit='♣'), PlayingCard(rank='K', suit='♣'), PlayingCard(rank='A', suit='♣'), PlayingCard(rank='2', suit='♢'), PlayingCard(rank='3', suit='♢'), PlayingCard(rank='4', suit='♢'), PlayingCard(rank='5', suit='♢'), PlayingCard(rank='6', suit='♢'), PlayingCard(rank='7', suit='♢'), PlayingCard(rank='8', suit='♢'), PlayingCard(rank='9', suit='♢'), PlayingCard(rank='10', suit='♢'), PlayingCard(rank='J', suit='♢'), PlayingCard(rank='Q', suit='♢'), PlayingCard(rank='K', suit='♢'), PlayingCard(rank='A', suit='♢'), PlayingCard(rank='2', suit='♡'), PlayingCard(rank='3', suit='♡'), PlayingCard(rank='4', suit='♡'), PlayingCard(rank='5', suit='♡

Let's adjust the ```__repr__``` to clean this up.

In [17]:
@dataclass
class Deck:
    cards: List[PlayingCard] = field(default_factory=make_french_deck)

    def __repr__(self):
        cards = ', '.join(f'{c!s}' for c in self.cards)
        return f'{self.__class__.__name__}({cards})'

Deck()

Deck(♣2, ♣3, ♣4, ♣5, ♣6, ♣7, ♣8, ♣9, ♣10, ♣J, ♣Q, ♣K, ♣A, ♢2, ♢3, ♢4, ♢5, ♢6, ♢7, ♢8, ♢9, ♢10, ♢J, ♢Q, ♢K, ♢A, ♡2, ♡3, ♡4, ♡5, ♡6, ♡7, ♡8, ♡9, ♡10, ♡J, ♡Q, ♡K, ♡A, ♠2, ♠3, ♠4, ♠5, ♠6, ♠7, ♠8, ♠9, ♠10, ♠J, ♠Q, ♠K, ♠A)

Representation: repr: make a readable text for printing to console.

- ```repr(obj)```: for developers
- ```str(obj)```: for users

Your better of using str, because then you can still recreate the object using the repr.

## Comparing Cards
comparing objects of PlayingCards doesn't go well.

In [18]:
%%capture output
queen_of_hearts = PlayingCard('Q', '♡')
ace_of_spades = PlayingCard('A', '♠')
ace_of_spades > queen_of_hearts

TypeError: '>' not supported between instances of 'PlayingCard' and 'PlayingCard'

But if we set order=True, it goes. Kinda. because characters are compared, not the values we assign to them.

In [19]:
@dataclass(order=True)
class PlayingCard:
    rank: str
    suit: str

    def __str__(self):
        return f'{self.suit}{self.rank}'

queen_of_hearts = PlayingCard('Q', '♡')
ace_of_spades = PlayingCard('A', '♠')
ace_of_spades > queen_of_hearts

False

Still not that great, let's come up with a system to rank.

In [20]:
RANKS = '2 3 4 5 6 7 8 9 10 J Q K A'.split()
SUITS = '♣ ♢ ♡ ♠'.split()
card = PlayingCard('Q', '♡')
RANKS.index(card.rank) * len(SUITS) + SUITS.index(card.suit)

42

From this we can create a sort index. Problem is that this only can be calculated after initialisation of the object. Luckily, dataclasses have a sooluation for this. ```__post_init___``` can be executed after the ```___init__```. Let's see if it works.

In [21]:
from dataclasses import dataclass, field

RANKS = '2 3 4 5 6 7 8 9 10 J Q K A'.split()
SUITS = '♣ ♢ ♡ ♠'.split()

@dataclass(order=True)
class PlayingCard:
    sort_index: int = field(init=False, repr=False)
    rank: str
    suit: str

    def __post_init__(self):
        self.sort_index = (RANKS.index(self.rank) * len(SUITS)
                           + SUITS.index(self.suit))

    def __str__(self):
        return f'{self.suit}{self.rank}'

queen_of_hearts = PlayingCard('Q', '♡')
ace_of_spades = PlayingCard('A', '♠')
ace_of_spades > queen_of_hearts

True

Yes! Now we can sort the deck using the sort index values!

In [22]:
Deck(sorted(make_french_deck()))

Deck(♣2, ♢2, ♡2, ♠2, ♣3, ♢3, ♡3, ♠3, ♣4, ♢4, ♡4, ♠4, ♣5, ♢5, ♡5, ♠5, ♣6, ♢6, ♡6, ♠6, ♣7, ♢7, ♡7, ♠7, ♣8, ♢8, ♡8, ♠8, ♣9, ♢9, ♡9, ♠9, ♣10, ♢10, ♡10, ♠10, ♣J, ♢J, ♡J, ♠J, ♣Q, ♢Q, ♡Q, ♠Q, ♣K, ♢K, ♡K, ♠K, ♣A, ♢A, ♡A, ♠A)

There is also a nice way to get a raandom amount of objects:

In [23]:
Deck(sample(make_french_deck(), k=10))

Deck(♢A, ♢6, ♣A, ♡8, ♣7, ♢Q, ♡3, ♠10, ♡4, ♢7)

Some boolean arguments that can be altered in the dataclass:
- init: Add ```.__init__()``` method? (Default is True.)
- repr: Add ```.__repr__()``` method? (Default is True.)
- eq: Add ```.__eq__()``` method? (Default is True.)
- order: Add ordering methods? (Default is False.)
- unsafe_hash: Force the addition of a ```.__hash__()``` method? (Default is False.)
- frozen: If True, assigning to fields raise an exception. (Default is False.)

# Immutable Data Classes

Can be done by adding the parameter ```frozen=True``` in ```@dataclass```.

object can be created, but not adjusted.

In [24]:
%%capture output
@dataclass(frozen=True)
class Position:
    name: str
    lon: float = 0.0
    lat: float = 0.0

# Gives error
pos = Position('Oslo', 10.8, 59.9)
pos.name = 'Stockholm'

FrozenInstanceError: cannot assign to field 'name'

But lists don't do this.

In [25]:
@dataclass(frozen=True)
class ImmutableCard:
    rank: str
    suit: str

@dataclass(frozen=True)
class ImmutableDeck:
    cards: List[ImmutableCard]

queen_of_hearts = ImmutableCard('Q', '♡')
ace_of_spades = ImmutableCard('A', '♠')
deck = ImmutableDeck([queen_of_hearts, ace_of_spades])
deck

ImmutableDeck(cards=[ImmutableCard(rank='Q', suit='♡'), ImmutableCard(rank='A', suit='♠')])

In [26]:
deck.cards[0] = ImmutableCard('7', '♢')
deck

ImmutableDeck(cards=[ImmutableCard(rank='7', suit='♢'), ImmutableCard(rank='A', suit='♠')])

WARNING: Using a tuple doesn't make in unmutable, funny enough.

In [27]:
@dataclass(frozen=True)
class ImmutableCard:
    rank: str
    suit: str

@dataclass(frozen=True)
class ImmutableDeck:
    cards: Tuple[ImmutableCard]

queen_of_hearts = ImmutableCard('Q', '♡')
ace_of_spades = ImmutableCard('A', '♠')
deck = ImmutableDeck([queen_of_hearts, ace_of_spades])
deck

ImmutableDeck(cards=[ImmutableCard(rank='Q', suit='♡'), ImmutableCard(rank='A', suit='♠')])

In [28]:
deck.cards[0] = ImmutableCard('7', '♢')
deck

ImmutableDeck(cards=[ImmutableCard(rank='7', suit='♢'), ImmutableCard(rank='A', suit='♠')])

# Inheritance

In [29]:
from dataclasses import dataclass

@dataclass
class Position:
    name: str
    lon: float
    lat: float

@dataclass
class Capital(Position):
    country: str

Capital('Oslo', 10.8, 59.9, 'Norway')

Capital(name='Oslo', lon=10.8, lat=59.9, country='Norway')

Important detail: if the base class has a default argument, then the extending class also must have default arguments. The code below gives an error.

In [30]:
%%capture output
@dataclass
class Position:
    name: str
    lon: float = 0.0
    lat: float = 0.0

@dataclass
class Capital(Position):
    country: str  # Does NOT work

Capital('Madrid', country='Spain')

TypeError: non-default argument 'country' follows default argument

But this will work:

In [31]:
@dataclass
class Position:
    name: str
    lon: float = 0.0
    lat: float = 0.0

@dataclass
class Capital(Position):
    country: str = 'Unknown'
    lat: float = 40.0

Capital('Madrid', country='Spain')

Capital(name='Madrid', lon=0.0, lat=40.0, country='Spain')

# Optimizing Data Classes

Both dataclasses and regular classes have the options to define slots. This makes the class faster and smaller. Downside is that a (data)class using slots does not support default arguments.

In [32]:
@dataclass
class SimplePosition:
    name: str
    lon: float
    lat: float

@dataclass
class SlotPosition:
    __slots__ = ['name', 'lon', 'lat']
    name: str
    lon: float
    lat: float

In [33]:
slots = timeit('slot.name', setup="slot=SlotPosition('Oslo', 10.8, 59.9)", globals=globals())
simple = timeit('simple.name', setup="simple=SimplePosition('Oslo', 10.8, 59.9)", globals=globals())
print('{:20} {:7.4f} {:}'.format("slot takes:", slots, "seconds."))
print('{:20} {:7.4f} {:}'.format("simple takes:", simple, "seconds."))
print('{:20} {:7.4f} {:}'.format("speed improvement:", 100*(slots-simple)/simple, "%"))

slot takes:           0.0142 seconds.
simple takes:         0.0141 seconds.
speed improvement:    0.2370 %
