# Testing and comparing named tuples vs. data classes in Python

My brief comparison, only for informational purposes.

In [1]:
from collections import namedtuple
from dataclasses import dataclass
import sys
from typing import NamedTuple, Union

### Creating types

* `Z`: `collections.namedtuple`
* `Z_`: `typing.NamedTuple` the old way
* `Z__`: `typing.NamedTuple` the new way (`class`-like)
* `D`: `dataclass` with `__dict__`
* `D_`: `dataclass` with `__slots__`

In [2]:
Z = namedtuple('Z', ('x1', 'x2', 'x3', 'x4', 'x5', 'y'))
Z.__doc__ = \
    """
    Z type

    Attributes
    ----------
    x1 : int
        Description of x
    x2 : int
        Description of x
    x3 : int
        Description of x
    x4 : int
        Description of x
    x5 : int
        Description of x
    y : Union[int, str]
        Description of y

    """


Z_ = NamedTuple('Z_', x1=int, x2=int, x3=int, x4=int, x5=int, y=str)
Z_.__doc__ = \
    """
    Z_ type

    Attributes
    ----------
    x1 : int
        Description of x
    x2 : int
        Description of x
    x3 : int
        Description of x
    x4 : int
        Description of x
    x5 : int
        Description of x
    y : Union[int, str]
        Description of y

    """


class Z__(NamedTuple):
    """
    Z__ type

    Attributes
    ----------
    x1 : int
        Description of x
    x2 : int
        Description of x
    x3 : int
        Description of x
    x4 : int
        Description of x
    x5 : int
        Description of x
    y : Union[int, str]
        Description of y

    """
    x1: int
    x2: int
    x3: int
    x4: int
    x5: int
    y: Union[int, str]


@dataclass(frozen=True)
class D:
    """
    D type

    Attributes
    ----------
    x1 : int
        Description of x
    x2 : int
        Description of x
    x3 : int
        Description of x
    x4 : int
        Description of x
    x5 : int
        Description of x
    y : Union[int, str]
        Description of y

    """
    x1: int
    x2: int
    x3: int
    x4: int
    x5: int
    y: Union[int, str]


@dataclass(frozen=True)
class D_:
    """
    D_ type

    Attributes
    ----------
    x1 : int
        Description of x
    x2 : int
        Description of x
    x3 : int
        Description of x
    x4 : int
        Description of x
    x5 : int
        Description of x
    y : Union[int, str]
        Description of y

    """
    __slots__ = ('x1', 'x2', 'x3', 'x4', 'x5', 'y')
    x1: int
    x2: int
    x3: int
    x4: int
    x5: int
    y: Union[int, str]

### Creating instances

* `z1`: `collections.namedtuple`
* `z2`: `typing.NamedTuple` the old way
* `z3`: `typing.NamedTuple` the new way (`class`-like)
* `d1`: `dataclass` with `__dict__`
* `d2`: `dataclass` with `__slots__`

In [3]:
z1 = Z(1, 2, 3, 4, 5, 'xyz')
z2 = Z_(1, 2, 3, 4, 5, 'xyz')
z3 = Z__(1, 2, 3, 4, 5, 'xyz')
d1 = D(1, 2, 3, 4, 5, 'xyz')
d2 = D_(1, 2, 3, 4, 5, 'xyz')

### Testing behavior of the `==` operator

In [4]:
z1 == z2 == z3

True

In [5]:
z1 == d1

False

In [6]:
d1 == d2

False

In [7]:
d1 == D(1, 2, 3, 4, 5, 'xyz')

True

In [8]:
d2 == D_(1, 2, 3, 4, 5, 'xyz')

True

### Comparing members

In [9]:
print(
    sorted(set(dir(z1)) - set(dir(z2))),
    sorted(set(dir(z2)) - set(dir(z1))),
    sep='\n'
)

[]
['__annotations__']


In [10]:
print(
    sorted(set(dir(z1)) - set(dir(z3))),
    sorted(set(dir(z3)) - set(dir(z1))),
    sep='\n'
)

[]
['__annotations__', '__orig_bases__']


In [11]:
print(
    sorted(set(dir(z2)) - set(dir(z3))),
    sorted(set(dir(z3)) - set(dir(z2))),
    sep='\n'
)

[]
['__orig_bases__']


In [12]:
print(
    sorted(set(dir(z1)) - set(dir(d1))),
    sorted(set(dir(d1)) - set(dir(z1))),
    sep='\n'
)

['__add__', '__class_getitem__', '__contains__', '__getitem__', '__getnewargs__', '__iter__', '__len__', '__mul__', '__rmul__', '__slots__', '_asdict', '_field_defaults', '_fields', '_make', '_replace', 'count', 'index']
['__annotations__', '__dataclass_fields__', '__dataclass_params__', '__dict__', '__weakref__']


In [13]:
print(
    sorted(set(dir(Z)) - set(dir(Z_))),
    sorted(set(dir(Z_)) - set(dir(Z))),
    sep='\n'
)

[]
['__annotations__']


In [14]:
print(
    sorted(set(dir(Z)) - set(dir(Z__))),
    sorted(set(dir(Z__)) - set(dir(Z))),
    sep='\n'
)

[]
['__annotations__', '__orig_bases__']


In [15]:
print(
    sorted(set(dir(Z_)) - set(dir(Z__))),
    sorted(set(dir(Z__)) - set(dir(Z_))),
    sep='\n'
)

[]
['__orig_bases__']


In [16]:
print(
    sorted(set(dir(Z)) - set(dir(D))),
    sorted(set(dir(D)) - set(dir(Z))),
    sep='\n'
)

['__add__', '__class_getitem__', '__contains__', '__getitem__', '__getnewargs__', '__iter__', '__len__', '__mul__', '__rmul__', '__slots__', '_asdict', '_field_defaults', '_fields', '_make', '_replace', 'count', 'index', 'x1', 'x2', 'x3', 'x4', 'x5', 'y']
['__annotations__', '__dataclass_fields__', '__dataclass_params__', '__dict__', '__weakref__']


In [17]:
print(
    sorted(set(dir(D)) - set(dir(D_))),
    sorted(set(dir(D_)) - set(dir(D))),
    sep='\n'
)

['__dict__', '__weakref__']
['__slots__', 'x1', 'x2', 'x3', 'x4', 'x5', 'y']


In [18]:
z2.__annotations__, z3.__annotations__

({'x1': int, 'x2': int, 'x3': int, 'x4': int, 'x5': int, 'y': str},
 {'x1': int,
  'x2': int,
  'x3': int,
  'x4': int,
  'x5': int,
  'y': typing.Union[int, str]})

In [19]:
z3.__orig_bases__

(<function typing.NamedTuple(typename, fields=None, /, **kwargs)>,)

In [20]:
d1.__annotations__, d2.__annotations__

({'x1': int,
  'x2': int,
  'x3': int,
  'x4': int,
  'x5': int,
  'y': typing.Union[int, str]},
 {'x1': int,
  'x2': int,
  'x3': int,
  'x4': int,
  'x5': int,
  'y': typing.Union[int, str]})

In [21]:
z1._fields, z2._fields, z2._fields

(('x1', 'x2', 'x3', 'x4', 'x5', 'y'),
 ('x1', 'x2', 'x3', 'x4', 'x5', 'y'),
 ('x1', 'x2', 'x3', 'x4', 'x5', 'y'))

In [22]:
d1.__dataclass_fields__, d2.__dataclass_fields__

({'x1': Field(name='x1',type=<class 'int'>,default=<dataclasses._MISSING_TYPE object at 0x7f78656c91f0>,default_factory=<dataclasses._MISSING_TYPE object at 0x7f78656c91f0>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({}),_field_type=_FIELD),
  'x2': Field(name='x2',type=<class 'int'>,default=<dataclasses._MISSING_TYPE object at 0x7f78656c91f0>,default_factory=<dataclasses._MISSING_TYPE object at 0x7f78656c91f0>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({}),_field_type=_FIELD),
  'x3': Field(name='x3',type=<class 'int'>,default=<dataclasses._MISSING_TYPE object at 0x7f78656c91f0>,default_factory=<dataclasses._MISSING_TYPE object at 0x7f78656c91f0>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({}),_field_type=_FIELD),
  'x4': Field(name='x4',type=<class 'int'>,default=<dataclasses._MISSING_TYPE object at 0x7f78656c91f0>,default_factory=<dataclasses._MISSING_TYPE object at 0x7f78656c91f0>,init=True,repr=True,hash=None,compare=

In [23]:
d1.__dataclass_params__, d1.__dict__, d2.__dataclass_params__, d2.__slots__

(_DataclassParams(init=True,repr=True,eq=True,order=False,unsafe_hash=False,frozen=True),
 {'x1': 1, 'x2': 2, 'x3': 3, 'x4': 4, 'x5': 5, 'y': 'xyz'},
 _DataclassParams(init=True,repr=True,eq=True,order=False,unsafe_hash=False,frozen=True),
 ('x1', 'x2', 'x3', 'x4', 'x5', 'y'))

In [24]:
z1._field_defaults, z2._field_defaults, z2._field_defaults

({}, {}, {})

In [25]:
z1.count, z2.count, z2.count

(<function Z.count(value, /)>,
 <function Z_.count(value, /)>,
 <function Z_.count(value, /)>)

In [26]:
z1.index, z2.index, z2.index

(<function Z.index(value, start=0, stop=9223372036854775807, /)>,
 <function Z_.index(value, start=0, stop=9223372036854775807, /)>,
 <function Z_.index(value, start=0, stop=9223372036854775807, /)>)

### Checking performance: initialization

* `collections.namedtuple` slightly better than  `typing.NamedTuple`
* `typing.NamedTuple` much better than `dataclass` with `__slots__`
* `dataclass` with `__slots__` slightly better than `dataclass` with `__dict__`

In [27]:
%%timeit
z1 = Z(1, 2, 3, 4, 5, 'xyz')

316 ns ± 0.201 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


In [28]:
%%timeit
z2 = Z_(1, 2, 3, 4, 5, 'xyz')

327 ns ± 0.695 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


In [29]:
%%timeit
z3 = Z__(1, 2, 3, 4, 5, 'xyz')

324 ns ± 0.25 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


In [30]:
%%timeit
d1 = D(1, 2, 3, 4, 5, 'xyz')

1.1 µs ± 0.156 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


In [31]:
%%timeit
d2 = D_(1, 2, 3, 4, 5, 'xyz')

998 ns ± 0.188 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


### Checking performance: reading attribute value

(Depends on the problem size)

* Named tuples slightly better than data classes

In [32]:
%%timeit
x11, x12, x13, x14, x15, y1 = z1.x1, z1.x2, z1.x3, z1.x4, z1.x5, z1.y

228 ns ± 0.405 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


In [33]:
%%timeit
x21, x22, x23, x24, x25, y2 = z2.x1, z2.x2, z2.x3, z2.x4, z2.x5, z2.y

217 ns ± 0.379 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


In [34]:
%%timeit
x31, x32, x33, x34, x35, y3 = z3.x1, z3.x2, z3.x3, z3.x4, z3.x5, z3.y

214 ns ± 0.185 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


In [35]:
%%timeit
x_1, x_2, x_3, x_4, x_5, y_ = d1.x1, d1.x2, d1.x3, d1.x4, d1.x5, d1.y

245 ns ± 0.218 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


In [36]:
%%timeit
x__1, x__2, x__3, x__4, x__5, y__ = d2.x1, d2.x2, d2.x3, d2.x4, d2.x5, d2.y

235 ns ± 0.21 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


### Testing behavior as `set` elements and `dict` keys

* All of them can be used as `set` elements and `dict` keys! Great!

In [37]:
s = {z1, z2, z3, d1, d2}
s_ = frozenset(s)
d_ = {z1: 1, z2: 2, z3:3, d1: 4, d2: 5}

s, d_

({D(x1=1, x2=2, x3=3, x4=4, x5=5, y='xyz'),
  D_(x1=1, x2=2, x3=3, x4=4, x5=5, y='xyz'),
  Z(x1=1, x2=2, x3=3, x4=4, x5=5, y='xyz')},
 {Z(x1=1, x2=2, x3=3, x4=4, x5=5, y='xyz'): 3,
  D(x1=1, x2=2, x3=3, x4=4, x5=5, y='xyz'): 4,
  D_(x1=1, x2=2, x3=3, x4=4, x5=5, y='xyz'): 5})

In [38]:
args = tuple(range(1, 6))

In [39]:
Z(*args, 'xyz') in s, Z(*args, 'xyz') in s_, Z(*args, 'xyz') in d_

(True, True, True)

In [40]:
Z_(*args, 'xyz') in s, Z_(*args, 'xyz') in s_, Z_(*args, 'xyz') in d_

(True, True, True)

In [41]:
Z__(*args, 'xyz') in s, Z__(*args, 'xyz') in s_, Z__(*args, 'xyz') in d_

(True, True, True)

In [42]:
D(*args, 'xyz') in s, D(*args, 'xyz') in s_, D(*args, 'xyz') in d_

(True, True, True)

In [43]:
D_(*args, 'xyz') in s, D_(*args, 'xyz') in s_, D_(*args, 'xyz') in d_

(True, True, True)

### Comparing memory usage

(Depends on the problem size)

* `dataclass` with `__dict__` much better than `dataclass` with `__slots__`
* `dataclass` with `__slots__` slightly better than named tuples
* All named tuples equal

In [44]:
for item in z1, z2, z3, d1, d2:
    print(sys.getsizeof(item))

88
88
88
48
80


### Checking performance: membership testing for `set`

* `collections.namedtuple` better than  `typing.NamedTuple`
* `typing.NamedTuple` much better than dataclass with `__dict__`
* `dataclass` with `__dict__` better than `dataclass` with `__slots__`

In [45]:
%%timeit
z1 in s

86.4 ns ± 0.0944 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)


In [46]:
%%timeit
z2 in s

110 ns ± 0.388 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)


In [47]:
%%timeit
z3 in s

109 ns ± 0.111 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)


In [48]:
%%timeit
d1 in s

508 ns ± 0.594 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


In [49]:
%%timeit
d2 in s

773 ns ± 0.215 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


### Checking performance: membership testing for `dict` keys

* `collections.namedtuple` better than  `typing.NamedTuple`
* `typing.NamedTuple` much better than dataclass with `__dict__`
* `dataclass` with `__dict__` better than `dataclass` with `__slots__`

In [50]:
%%timeit
z1 in d_

73.2 ns ± 0.0598 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)


In [51]:
%%timeit
z2 in d_

96.5 ns ± 0.0551 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)


In [52]:
%%timeit
z3 in d_

94.3 ns ± 0.0112 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)


In [53]:
%%timeit
d1 in d_

494 ns ± 0.164 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


In [54]:
%%timeit
d2 in d_

760 ns ± 0.481 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


### Checking performance: values access for `dict` with indexing

* `collections.namedtuple` better than  `typing.NamedTuple`
* `typing.NamedTuple` much better than dataclass with `__dict__`
* `dataclass` with `__dict__` better than `dataclass` with `__slots__`

In [55]:
%%timeit
r = d_[z1]

74.2 ns ± 0.113 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)


In [56]:
%%timeit
r = d_[z2]

101 ns ± 0.673 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)


In [57]:
%%timeit
r = d_[z3]

100 ns ± 0.0973 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)


In [58]:
%%timeit
r = d_[d1]

493 ns ± 0.642 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


In [59]:
%%timeit
r = d_[d2]

760 ns ± 0.267 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
