# 🚀 Level up your Python skills with dataclass 🍫

2023-02

# Bonjour à tous 👋

In [69]:
#J'en ai marre

class Item:
    def __init__(self, name: str, price: float, quantity: int = 0):
        self.name = name
        self.price = price
        self.quantity = quantity

    def __repr__(self):
        return (
            self.__class__.__qualname__
            + f"(name={self.name!r}, price={self.price!r}, quantity={self.quantity!r})"
        )

    def __eq__(self, other):
        return (
            self.name,
            self.price,
        ) == (
            other.name,
            other.price,
        )


# 📅 Sommaire

1. Good old fashion way to make class
1. Dataclass - Level 1
1. Dataclass - Level 2
1. Dataclass - Level 3
1. Conclusion


# 👵 Good old fashion way

In [70]:
# Par défaut
class Item:    
    def __init__(self, name, price, quantity):
        self.name = name 
        self.price = price
        self.quantity = quantity

In [71]:
# Avec des type hints et des valeurs par défaut
class Item:
    def __init__(self, name:str, price:float, quantity:int=0):
        self.name = name
        self.price = price
        self.quantity = quantity

In [72]:
i = Item("compu", 10)
i
# Repr pas belle

<__main__.Item at 0x7f8399ac03d0>

In [73]:
 i = Item("compu", 10)
i2 = Item("compu2", 20)
i3 = Item("compu", 10)


In [74]:
i > i2
# Pas de comparaison

TypeError: '>' not supported between instances of 'Item' and 'Item'

In [75]:
i == i3
# Pas d'égalité

False

# 😺 Level 1

# Dataclasses

* It was originally described in PEP 557.

* Python 3.7 (Jun. 2018)

In [77]:
from dataclasses import dataclass

@dataclass
class Item:
    name: str
    price:float
    quantity: int = 0

In [78]:
i = Item("compu", 10)
i


Item(name='compu', price=10, quantity=0)

In [80]:
i = Item("compu", 10)
i2 = Item("compu2", 20)
i3 = Item("compu", 10)


In [81]:
i == i2

False

In [82]:
i == i3

True

In [16]:
# Under the hood
class Item:
    def __init__(self, name: str, price: float, quantity: int = 0):
        self.name = name
        self.price = price
        self.quantity = quantity

    def __repr__(self):
        return (
            self.__class__.__qualname__
            + f"(name={self.name!r}, price={self.price!r}, quantity={self.quantity!r})"
        )

    def __eq__(self, other):
        if other.__class__ is self.__class__:  # Check for class equality
            return (
                self.name,
                self.price,
                self.quantity,
            ) == (
                other.name,
                other.price,
                self.quantity,
            )
        return NotImplemented


# 🐈 Level 2

## Order

In [85]:
# Order
from dataclasses import dataclass

@dataclass(order=True)
class Item:
    name: str
    price:float
    quantity: int = 0

In [86]:
i = Item("compu", 10)
i2 = Item("compu2", 20)
i3 = Item("compu", 10)
i4 = Item("zaperlipopette", 5)
i5 = Item("abracadbra", 10)
i6 = Item("abracadbra", 20)


In [87]:
i > i2

False

In [88]:
i > i5

True

In [89]:
items = [i, i2, i3, i4, i5, i6]
sorted(items)
# Sort by attribute order

[Item(name='abracadbra', price=10, quantity=0),
 Item(name='abracadbra', price=20, quantity=0),
 Item(name='compu', price=10, quantity=0),
 Item(name='compu', price=10, quantity=0),
 Item(name='compu2', price=20, quantity=0),
 Item(name='zaperlipopette', price=5, quantity=0)]

In [23]:
def __lt__(self, other):
    if other.__class__ is self.__class__:
        return (
            self.name,
            self.price,
            self.quantity,
        ) < (
            other.name,
            other.price,
            self.quantity,
        )
    return NotImplemented

def __le__(self, other):
    if other.__class__ is self.__class__:
        return (
            self.name,
            self.price,
            self.quantity,
        ) <= (
  
            `other.name,
            other.price,
            self.quantity,
        )
    return NotImplemented




SyntaxError: invalid syntax (2227200127.py, line 22)

In [None]:

def __gt__(self, other):
    if other.__class__ is self.__class__:
        return (
            self.name,
            self.price,
            self.quantity,
        ) > (
            other.name,
            other.price,
            self.quantity,
        )
    return NotImplemented

def __ge__(self, other):
    if other.__class__ is self.__class__:
        return (
            self.name,
            self.price,
            self.quantity,
        ) >= (
            other.name,
            other.price,
            self.quantity,
        )
    return NotImplemented

In [92]:
# Post-init
from dataclasses import dataclass

@dataclass(order=True)
class Item:
    sort_index:int
    name: str
    price:float
    quantity: int = 0

    def __post_init__(self):
        self.sort_index = self.price

In [94]:
i = Item(0, "compu", 10)
i2 = Item(0, "compu2", 20)
i3 = Item(0, "compu", 10)
i4 = Item(0, "zaperlipopette", 5)
i5 = Item(0, "abracadbra", 10)
i6 = Item(0, "abracadbra", 20)

In [95]:
i

Item(sort_index=10, name='compu', price=10, quantity=0)

In [96]:
items = [i, i2, i3, i4, i5, i6]
sorted(items)

[Item(sort_index=5, name='zaperlipopette', price=5, quantity=0),
 Item(sort_index=10, name='abracadbra', price=10, quantity=0),
 Item(sort_index=10, name='compu', price=10, quantity=0),
 Item(sort_index=10, name='compu', price=10, quantity=0),
 Item(sort_index=20, name='abracadbra', price=20, quantity=0),
 Item(sort_index=20, name='compu2', price=20, quantity=0)]

### field

In [97]:
# Pas pratique, on va exclure sort_index de l'init
from dataclasses import dataclass, field

@dataclass(order=True)
class Item:
    sort_index:int = field(init=False)
    name: str
    price:float
    quantity: int = 0

    def __post_init__(self):
        self.sort_index = self.price

In [100]:
i = Item("compu", 10)
i2 = Item("compu2", 20)
i3 = Item("compu", 10)
i4 = Item("zaperlipopette", 5)
i5 = Item("abracadbra", 10)
i6 = Item("abracadbra", 20)

In [31]:
i

Item(sort_index=10, name='compu', price=10, quantity=0)

In [101]:
items = [i, i2, i3, i4, i5, i6]
sorted(items)

[Item(sort_index=5, name='zaperlipopette', price=5, quantity=0),
 Item(sort_index=10, name='abracadbra', price=10, quantity=0),
 Item(sort_index=10, name='compu', price=10, quantity=0),
 Item(sort_index=10, name='compu', price=10, quantity=0),
 Item(sort_index=20, name='abracadbra', price=20, quantity=0),
 Item(sort_index=20, name='compu2', price=20, quantity=0)]

In [102]:
# On peut aussi exclure cet index de repr
from dataclasses import dataclass, field

@dataclass(order=True)
class Item:
    sort_index:int = field(init=False, repr=False)
    name: str
    price:float
    quantity: int = 0

    def __post_init__(self):
        self.sort_index = self.price

In [103]:
i = Item("compu", 10)
i2 = Item("compu2", 20)
i3 = Item("compu", 10)
i4 = Item("zaperlipopette", 5)
i5 = Item("abracadbra", 10)
i6 = Item("abracadbra", 20)

In [104]:
i

Item(name='compu', price=10, quantity=0)

In [105]:
items = [i, i2, i3, i4, i5, i6]
sorted(items)

[Item(name='zaperlipopette', price=5, quantity=0),
 Item(name='abracadbra', price=10, quantity=0),
 Item(name='compu', price=10, quantity=0),
 Item(name='compu', price=10, quantity=0),
 Item(name='abracadbra', price=20, quantity=0),
 Item(name='compu2', price=20, quantity=0)]


## Frozen

In [106]:
from dataclasses import dataclass, field

@dataclass(order=True, frozen=True)
class Item:
    name: str
    price:float
    quantity: int = 0


In [107]:
i = Item("compu", 10)
i

Item(name='compu', price=10, quantity=0)

In [108]:
i.name = "new name"

FrozenInstanceError: cannot assign to field 'name'

In [109]:
del(i.name)

FrozenInstanceError: cannot delete field 'name'


### set_attr

In [110]:
# On peut toujours, mais ca devient plus compliqué
object.__setattr__(i, "name", "new_name")
i

Item(name='new_name', price=10, quantity=0)

## field

In [112]:
# compare
@dataclass(order=True)
class Item:
    name: str
    price:float = field(compare=False)
    quantity: int = field(default=0, repr=False)

In [113]:
i = Item("compu", 300)
i2 = Item("compu2", 20)
i3 = Item("compu", 10)
i4 = Item("zaperlipopette", 5)
i5 = Item("abracadbra", 10)
i6 = Item("abracadbra", 20)

items = [i, i2, i3, i4, i5, i6]
sorted(items)

[Item(name='abracadbra', price=10),
 Item(name='abracadbra', price=20),
 Item(name='compu', price=300),
 Item(name='compu', price=10),
 Item(name='compu2', price=20),
 Item(name='zaperlipopette', price=5)]

### default_factory

In [116]:
# default_factory
@dataclass(order=True)
class Item:
    name: str
    price:float = field(compare=False)
    quantity: int = field(default=0, repr=False)
    ingredients: list[str] = []

ValueError: mutable default <class 'list'> for field ingredients is not allowed: use default_factory

In [117]:
# default_factory
@dataclass(order=True)
class Item:
    name: str
    price:float = field(compare=False)
    quantity: int = field(default=0, repr=False)
    ingredients: list[str] = field(default_factory=list)

In [118]:
i = Item("compu", 300)
i2 = Item("compu2", 20)

i.ingredients = ["salut", "u"]
i

Item(name='compu', price=300, ingredients=['salut', 'u'])

### metadata

In [119]:
# metadata
@dataclass(order=True)
class Item:
    name: str
    price:float = field(compare=False, metadata={"units": "euro"})
    quantity: int = field(default=0, repr=False)

In [120]:
from dataclasses import fields


i = Item("compu", 300)
fields(i)[1]

Field(name='price',type=<class 'float'>,default=<dataclasses._MISSING_TYPE object at 0x7f8399090f70>,default_factory=<dataclasses._MISSING_TYPE object at 0x7f8399090f70>,init=True,repr=True,hash=None,compare=False,metadata=mappingproxy({'units': 'euro'}),_field_type=_FIELD)

## Hash

If the object has a hash value then it can be used as a key for a dictionary or as an element in a set.

In [121]:
hash(i)

TypeError: unhashable type: 'Item'

In [122]:
@dataclass(order=True, unsafe_hash=True)
class Item:
    name: str
    price:float = field(compare=False)
    quantity: int = field(default=0, repr=False)

In [123]:
i = Item("compu", 300)
hash(i)

-6779222744390553206

In [124]:
i2 = Item("compu", 500, 10)
hash(i2)

-7503103874712779167

In [125]:
@dataclass(order=True, unsafe_hash=True)
class Item:
    name: str = field(hash=True)
    price:float = field(compare=False, hash=False)
    quantity: int = field(default=0, repr=False, hash=False)

In [127]:
i = Item("compu", 300)
hash(i)

-3293060263487966230

In [128]:
i2 = Item("compu", 500, 10)
hash(i2)

-3293060263487966230

# 🐅 Level 3

## Type

In [130]:
from typing import ClassVar
# ClassVar
@dataclass()
class Item:
    name: str
    price:float
    quantity: int = 0
    brand: ClassVar[int] = 19

In [131]:
i = Item("compu", 300)
print(i.brand)
i.__dataclass_fields__

19


{'name': Field(name='name',type=<class 'str'>,default=<dataclasses._MISSING_TYPE object at 0x7f8399090f70>,default_factory=<dataclasses._MISSING_TYPE object at 0x7f8399090f70>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({}),_field_type=_FIELD),
 'price': Field(name='price',type=<class 'float'>,default=<dataclasses._MISSING_TYPE object at 0x7f8399090f70>,default_factory=<dataclasses._MISSING_TYPE object at 0x7f8399090f70>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({}),_field_type=_FIELD),
 'quantity': Field(name='quantity',type=<class 'int'>,default=0,default_factory=<dataclasses._MISSING_TYPE object at 0x7f8399090f70>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({}),_field_type=_FIELD),
 'brand': Field(name='brand',type=typing.ClassVar[int],default=19,default_factory=<dataclasses._MISSING_TYPE object at 0x7f8399090f70>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({}),_field_type=_FIELD_CLASSVAR)}

In [132]:
Item.brand = 35
Item.brand

35

In [133]:
i.brand

35

## Slots



Optimizing with slots

* Slots can be used to make classes faster and use less memory. 
* Data classes have no explicit syntax for working with slots, 
* but the normal way of creating slots works for data classes as well. (They really are just regular classes!)

More info: https://stackoverflow.com/questions/472000/usage-of-slots

In [139]:
from dataclasses import dataclass

@dataclass
class SimplePosition:
    name: str
    lon: float
    lat: float

@dataclass
class SlotPosition:
    __slots__ = ['name', 'lon', 'lat']
    name: str
    lon: float
    lat: float

# The benefit of adding such restrictions is that certain optimizations may be done. 
# For instance, slots classes take up less memory, as can be measured using Pympler:

In [147]:
from timeit import timeit
timeit('slot.name', setup="slot=SlotPosition('Oslo', 10.8, 59.9)", globals=globals())
# 0.026604000013321638


0.04368233400009558

In [148]:
timeit('simple.name', setup="simple=SimplePosition('Oslo', 10.8, 59.9)", globals=globals())
# 0.04622900002868846

# 35% faster

0.049237374999847816

# 🧐 But...

## Data Class  as a Code Smell

Be aware that it may signal a problem in your design.

In Refactoring: "Improving the Design of Existing Code, 2nd ed. (Addison-Wesley)", Martin Fowler and Kent Beck present a catalog of “code smells”—patterns in code that may indicate the need for refactoring. The entry titled “Data Class” starts like this:

> These are classes that have fields, getting and setting methods for fields, and nothing else. Such classes are dumb data holders and are often being manipulated in far too much detail by other classes.

In Fowler’s personal website, there’s an illuminating post titled “Code Smell” (https://martinfowler.com/bliki/CodeSmell.html)


# 🧐 But...

## Dataclasses are good to hold data
* as scaffolding
* as intermediate representation

# 😎 Conclusion

* Data classes are one of the new features of Python 3.7. 
* You do not have to write boilerplate code to get proper initialization, representation, and comparisons for your objects.



In [149]:
from typing import ClassVar

@dataclass(order=True, frozen=True, unsafe_hash=True)
class Item:
    name: str
    price:float = field(compare=False, hash=False)
    sku: int | None = None
    quantity: int = field(default=0, repr=False, hash=False)
    ingredients: list[str] = field(compare=False, default_factory=list)
    
    brand: ClassVar[int] = 19


TypeError: unsupported operand type(s) for |: 'type' and 'NoneType'

# 📚 Sources

* Raymond Hettinger - Dataclasses: The code generator to end all code generators - PyCon 2018
  * https://www.youtube.com/watch?v=T-TwcmT6Rcw
* ArjanCodes - If you're not using Python DATA CLASSES yet, you should 🚀
  * https://www.youtube.com/watch?v=vRVVyl9uaZc 
* mCoding - Python dataclasses will save you HOURS, also featuring attrs
  * https://www.youtube.com/watch?v=vBH6GRJ1REM 
* John Watson Rooney - How I Organize Data In Python with Dataclasses
  * https://www.youtube.com/watch?v=5mpLJxKfnXQ 
* https://docs.python.org/3/library/dataclasses.html
* https://www.invivoo.com/dataclasses-python/



# README

 jupyter nbextension enable rise --py --sys-prefix