Instance data storage review

In [5]:
class Employee(object):
    def __init__(self,name,surname,age,status,salary):
        self.name = name
        self.surname = surname
        self.age = age
        self.status = status
        self.salary = salary

In [6]:
e1=Employee('Andrew','Doe',24,'FT',46000)

In [7]:
e1

<__main__.Employee at 0x130625eaed0>

In [8]:
e1.name
e1.surname

'Doe'

In [9]:
e1.__dict__

{'name': 'Andrew',
 'surname': 'Doe',
 'age': 24,
 'status': 'FT',
 'salary': 46000}

In [10]:
type(e1.__dict__) #instance namespace

dict

In [11]:
type(e1.__class__.__dict__) # class namespace

mappingproxy

In [12]:
e1.__dict__['pension'] ='DB'

In [13]:
e1.pension

'DB'

In [14]:
# flexibility -> cost
# memory and execution time problems
# so slots have to be used

Slots

In [15]:
class Employee(object):
    __slots__ = ('name','surname','age','status','salary')
    def __init__(self,name,surname,age,status,salary):
        self.name = name
        self.surname = surname
        self.age = age
        self.status = status
        self.salary = salary

In [16]:
e1=Employee('Andrew','Doe',24,'FT',46000)

In [17]:
e1

<__main__.Employee at 0x130625f5760>

In [18]:
e1.name

'Andrew'

In [19]:
e1.__dict__ # no more dictionary

AttributeError: 'Employee' object has no attribute '__dict__'

In [20]:
e1.__slots__ # now it have fixed size array, so it as small as it needs to be, also it execudes fast because attribute mapped to indexes

('name', 'surname', 'age', 'status', 'salary')

In [None]:
# so basicaly we go form hash map to fixed-len array that is written on C

In [None]:
# we no longer have no option to add attributes on the go, only that we had defined already

Class Residents

In [24]:
class Employee(object):
    __slots__ = ('name','surname','age','status','salary')
    def __init__(self,name,surname,age,status,salary):
        self.name = name
        self.surname = surname
        self.age = age
        self.status = status
        self.salary = salary
    @property
    def high_salary(self):
        return self.salary > 40000

In [25]:
e1=Employee('Andrew','Doe',24,'FT',46000)

In [26]:
Employee.__dict__

mappingproxy({'__module__': '__main__',
              '__slots__': ('name', 'surname', 'age', 'status', 'salary'),
              '__init__': <function __main__.Employee.__init__(self, name, surname, age, status, salary)>,
              'high_salary': <property at 0x130625f7f10>,
              'age': <member 'age' of 'Employee' objects>,
              'name': <member 'name' of 'Employee' objects>,
              'salary': <member 'salary' of 'Employee' objects>,
              'status': <member 'status' of 'Employee' objects>,
              'surname': <member 'surname' of 'Employee' objects>,
              '__doc__': None})

In [None]:
# both properties and sloted attr are examples of discriptor. They are special python objects that implement one of the 3 descriptors methods:
# - get(), -set(), -del()
# when we define sloted name, it overrides the default __getattribute__ behaviour.

Memory advantage

In [None]:
# pympler - to analyze complexity

In [27]:
!pip install pympler

Collecting pympler
  Downloading Pympler-1.1-py3-none-any.whl.metadata (3.6 kB)
Downloading Pympler-1.1-py3-none-any.whl (165 kB)
   ---------------------------------------- 0.0/165.8 kB ? eta -:--:--
   -- ------------------------------------- 10.2/165.8 kB ? eta -:--:--
   ------- ------------------------------- 30.7/165.8 kB 435.7 kB/s eta 0:00:01
   -------------- ------------------------ 61.4/165.8 kB 648.1 kB/s eta 0:00:01
   ---------------------------------------- 165.8/165.8 kB 1.4 MB/s eta 0:00:00
Installing collected packages: pympler
Successfully installed pympler-1.1


In [28]:
class SlottedEmployee(object):
    __slots__ = ('name','surname','age','status','salary')
    def __init__(self,name,surname,age,status,salary):
        self.name = name
        self.surname = surname
        self.age = age
        self.status = status
        self.salary = salary
class Employee(object):
    def __init__(self,name,surname,age,status,salary):
        self.name = name
        self.surname = surname
        self.age = age
        self.status = status
        self.salary = salary



In [29]:
e2=Employee('Andrew','Doe',24,'FT',46000)
e1=SlottedEmployee('Andrew','Doe',24,'FT',46000)

In [32]:
# import sys
# sys.path.append('/path/to/pympler')
from pympler.asizeof import asizeof

In [33]:
asizeof(e1)

304

In [34]:
asizeof(e2)

864

In [35]:
(304-864)/664 # 84% reduction

-0.8433734939759037

In [36]:
from sys import getsizeof

In [37]:
getsizeof(e1)

72

In [38]:
getsizeof(e2) # it does not account for referenced objects, it only counts size 

56

Inhereting slots

In [39]:
class Employee(object):
    __slots__ = ('name','surname','age','status','salary')
    def __init__(self,name,surname,age,status,salary):
        self.name = name
        self.surname = surname
        self.age = age
        self.status = status
        self.salary = salary


In [40]:
class Developer(Employee):
    pass

In [41]:
d = Developer('Beverly','Simons', 24,'FT',79000)

In [42]:
d.__dict__ # so we are free to add attributes there, in child class

{}

In [43]:
d.favorite_lang='python'

In [44]:
d.__dict__

{'favorite_lang': 'python'}

In [45]:
class BusinesAnalyst(Employee):
    __slots__='experience' # in this case subclass loses its dictionary and flexibility

In [46]:
ba= BusinesAnalyst('Vlad','Rob',33,'PT',67888)

In [47]:
ba.__dict__

AttributeError: 'BusinesAnalyst' object has no attribute '__dict__'

In [48]:
class Employee(object):
    #__slots__ = ('name','surname','age','status','salary')
    def __init__(self,name,surname,age,status,salary):
        self.name = name
        self.surname = surname
        self.age = age
        self.status = status
        self.salary = salary


In [49]:
class BusinesAnalyst(Employee):
    __slots__=__slots__ = ('name','surname','age','status','salary','experience' )

In [50]:
ba= BusinesAnalyst('Vlad','Rob',33,'PT',67888)

In [51]:
ba.__dict__ # if we inherit from not sloted parent and slot our class in child class, we won't lose our dict and flexibility

{}

In [52]:
BusinesAnalyst.__dict__

mappingproxy({'__module__': '__main__',
              '__slots__': ('name',
               'surname',
               'age',
               'status',
               'salary',
               'experience'),
              'age': <member 'age' of 'BusinesAnalyst' objects>,
              'experience': <member 'experience' of 'BusinesAnalyst' objects>,
              'name': <member 'name' of 'BusinesAnalyst' objects>,
              'salary': <member 'salary' of 'BusinesAnalyst' objects>,
              'status': <member 'status' of 'BusinesAnalyst' objects>,
              'surname': <member 'surname' of 'BusinesAnalyst' objects>,
              '__doc__': None})

Smth to avoid

In [56]:
class Employee(object):
    __slots__ = ('name','surname','age','status','salary','__dict__') # to add dict back, howewer we need to avoid it, it's careless and defeats slot's purpose
    def __init__(self,name,surname,age,status,salary):
        self.name = name
        self.surname = surname
        self.age = age
        self.status = status
        self.salary = salary


In [57]:
e1=Employee('Andrew','Doe',24,'FT',46000)

In [58]:
hasattr(e1,'__dict__')

True

In [59]:
e1.__dict__

{}

In [60]:
e1.nick='gay'

In [61]:
e1.__dict__

{'nick': 'gay'}

Should we always use slots? (no)

In [None]:
# it loses __dict__ which is huge, we use slots only for perfomance and optimization. It also makes inheritence trickier.