In [2]:
from datetime import datetime
from random import choice, seed

## Descriptors:
Suppose we want a Point2D class whose coordinates must always be integers
- plain attributes x and y cannot guarantee this (user can for example set x to 10.3)
- instead we can use a property with getter and setter methods
    * we can do this with regular @property, getter, setter but there will be a lot of code repetition when we have more than 1 variable that we want to set.
- **In this case it is better to use descriptors**
    

### 4 main methods of the descriptor protocol:
- \__get__ (used to get an attribute value)
- \__set__ (used to set an attribute value)
- \__delete__ (used to delete an attribute)
- \__set_name__


### Two categories of descriptors
- Those that implement \__get__ only --> non-data descriptors
- Those that implement \__set\__  and/or \__delete__

### non-data descriptors
These are non-data descriptors because they only implement the \__get__ method

In [3]:
class TimeUTC:
    def __get__(self, instance, owner_class):
        return datetime.utcnow().isoformat()

class Logger:
    current_time = TimeUTC()

l = Logger()
l.current_time

'2021-06-19T18:56:49.040643'

In [4]:
class Choice:
    def __init__(self, *choices):
        self.choices = choices
    
    def __get__(self, instance, owner_class):
        return choice(self.choices)

class Deck:
    suit = Choice('Spade', 'Heart', 'Diamnond', 'Club')
    card = Choice(*'23456789JQKA', '10')

seed(0)
d = Deck()
for _ in range(10):
    print(d.card, d.suit)

8 Club
2 Diamnond
J Club
8 Diamnond
9 Diamnond
Q Heart
J Heart
6 Heart
10 Spade
Q Diamnond


The advantage of using a separate class for choices is that we can re-use the class for other purposes, like for example a dice.

In [5]:
class Dice:
    die_1 = Choice(1, 2, 3, 4, 5, 6)
    die_2 = Choice(1, 2, 3, 4, 5, 6)
    die_3 = Choice(1, 2, 3, 4, 5, 6)

d = Dice()
for _ in range(3):
    print(d.die_1, d.die_2, d.die_3)

5 6 5
2 3 1
6 1 6


### Getters and setters

#### The \__get__ method
- signature is as follows:
    * self
    * instance
    * owner_class

- we can return different values from \__get__ depending on:
    * called from class
    * called from instance

- very often we choose to:
    * return the descriptor instance when called from the class itself
        * gives us an easy handle to the descriptor instance


#### The \__set__ method
- the \__set__ signature is as follows: self, instance, value
    * self: this references the descriptor instance, like any regular  method
    * instance: the instance the \__set__ method was called from
    * value: the value we want to assign to the attribute
- there is no need to have an owner class for the \__set__ method because setters (and deleters) are always called from instances.

In [6]:
class TimeUTC:
    def __get__(self, instance, owner_class):
        print(f'__get__ called, self={self}, instance={instance}, owner_class={owner_class}')
        return datetime.utcnow().isoformat()

class Logger1:
    current_time = TimeUTC()

class Logger2:
    current_time = TimeUTC()

# notice that instance=None because we are calling this method from the class
print(Logger1.current_time, end='\n\n')

l1 = Logger1()
l1.current_time

__get__ called, self=<__main__.TimeUTC object at 0x000002042BA28BB0>, instance=None, owner_class=<class '__main__.Logger1'>
2021-06-19T18:56:49.880455

__get__ called, self=<__main__.TimeUTC object at 0x000002042BA28BB0>, instance=<__main__.Logger1 object at 0x000002042BA28610>, owner_class=<class '__main__.Logger1'>


'2021-06-19T18:56:49.880455'

In [7]:
class TimeUTC:
    def __get__(self, instance, owner_class):
        if instance is None:
            return self
        return datetime.utcnow().isoformat()

class Logger:
    current_time = TimeUTC()

print(Logger.current_time)
l = Logger()
l.current_time

<__main__.TimeUTC object at 0x000002042BA28CD0>


'2021-06-19T18:56:50.064974'

The above "functionality" is consistent with how properties within a class is returned depending on whether it is called from the class or the instance. Below, when current_time is called from the class the property object is returned, while the datetime is returned when current_time is called from an instance.

In [8]:
class Logger:
    @property
    def current_time(self):
        return datetime.utcnow().isoformat()

print(Logger.current_time)
l = Logger()
l.current_time

<property object at 0x000002042BA23770>


'2021-06-19T18:56:50.400102'

In [9]:
class TimeUTC:
    def __get__(self, instance, owner_class):
        if instance is None:
            return self
        else:
            print(f'__get__ called in {self}')
            return datetime.utcnow().isoformat()

class Logger:
    current_time = TimeUTC()

l1 = Logger()
l2 = Logger()


l1.current_time, l2.current_time

__get__ called in <__main__.TimeUTC object at 0x000002042B9D0310>
__get__ called in <__main__.TimeUTC object at 0x000002042B9D0310>


('2021-06-19T18:56:50.589618', '2021-06-19T18:56:50.589618')

In the code above we create a class Logger with current_time as a class variable. \__get__ is called with the same object. This is not a problem when we only return the atetime.utcnow().isoformat(), but it turns into a problem when we want to store a state like below. The two instances share the same state since countdown is a class variable.

In [10]:
class Countdown:
    def __init__(self, start):
        self.start = start + 1
    
    def __get__(self, instance, owner):
        if instance is None:
            return self
        self.start -= 1
        return self.start

class Rocket:
    countdown = Countdown(10)

rocket1 = Rocket()
rocket2 = Rocket()

print(rocket1.countdown)
print(rocket2.countdown)
print(rocket1.countdown)

10
9
8


#### the \__set__ method

In [11]:
class IntegerValue:
    def __set__(self, instance, value):
        self._value = value

    def __get__(self, instance, owner_class):
        if instance is None:
            return self
        else:
            return self._value

class Point2D:
    x = IntegerValue()
    y = IntegerValue()

p1 = Point2D()
p1.x = 1.1
p1.y = 2.2
print(f'p1.x={p1.x}, p1.y={p1.y}')

p2 = Point2D()
p2.x = 3.3
p2.y = 6.6
print(f'p2.x={p2.x}, p2.y={p2.y}')

# now p1.x and p1.y have changed
print(f'p1.x={p1.x}, p1.y={p1.y}')

p1.x=1.1, p1.y=2.2
p2.x=3.3, p2.y=6.6
p1.x=3.3, p1.y=6.6


Above, p1.x and p1.y changed when we set the values for p2.x and p2.y. The reason for this is that we are sharing the same instance of our descriptor, so it doesn't really matter if IntegerValue is called from p1 or p2.

### Instance properties

In [12]:
class IntegerValue:
    def __init__(self, name):
        self.storage_name = '_' + name
    
    def __set__(self, instance, value):
        # we store each value in the instance calling this function
        setattr(instance, self.storage_name, value)

    def __get__(self, instance, owner_class):
        if instance is None:
            return self
        return getattr(instance, self.storage_name, None)
        # getattr(object, name[, default])

class Point2D:
    x = IntegerValue('x')
    y = IntegerValue('y')

p1, p2 = Point2D(), Point2D()

p1.x = 10.1
p1.y = 20.2

p2.x = 30.1
p2.y = 60.2

print('p1.__dict__', p1.__dict__)
print('p2.__dict__', p2.__dict__)
print('p1.__dict__', p1.__dict__)


p1.__dict__ {'_x': 10.1, '_y': 20.2}
p2.__dict__ {'_x': 30.1, '_y': 60.2}
p1.__dict__ {'_x': 10.1, '_y': 20.2}


**Notice that in the method above, we store the data in the instances of Point2D, and NOT in the IntegerValue class**


This method may work well, but there are some drawbacks:
- we have to include the 'x' and 'y' as arguments to IntegerValue like this: IntegerValue('x')
- if IntegerValue is intended to be used by others, we don't know if they already have a '_x' or '_y' in their class.
- if the Point2D class of the user uses slots, the class won't suddenly accept '_x' or '_y' as arguments

In [13]:
# this class will be able to store a single argument, but for 1 or many instances
class IntegerValue:
    def __init__(self):
        self.values = {}
    
    def __set__(self, instance, value):
        self.values[instance] = int(value)
    
    def __get__(self, instance, owner_class):
        if instance is None:
            return self
        return self.values.get(instance)

class Point2D:
    x = IntegerValue()
    y = IntegerValue()

p1 = Point2D()
p2 = Point2D()
p1.x, p1.y = 10.1, 20.2
p2.x, p2.y = 13.1, 30.2

print(f'p1.x={p1.x}, p1.y={p1.y}')
print(f'p2.x={p2.x}, 21.y={p2.y}', end='\n\n')

# access the dict for x and y
print('Point2D.x.__dict__', Point2D.x.__dict__)
print('Point2D.y.__dict__', Point2D.y.__dict__, end='\n\n')

# access the value dict
print('Point2D x values', Point2D.x.values)
print('Point2D y values', Point2D.y.values)

p1.x=10, p1.y=20
p2.x=13, 21.y=30

Point2D.x.__dict__ {'values': {<__main__.Point2D object at 0x000002042B8CDD30>: 10, <__main__.Point2D object at 0x000002042B8CDC40>: 13}}
Point2D.y.__dict__ {'values': {<__main__.Point2D object at 0x000002042B8CDD30>: 20, <__main__.Point2D object at 0x000002042B8CDC40>: 30}}

Point2D x values {<__main__.Point2D object at 0x000002042B8CDD30>: 10, <__main__.Point2D object at 0x000002042B8CDC40>: 13}
Point2D y values {<__main__.Point2D object at 0x000002042B8CDD30>: 20, <__main__.Point2D object at 0x000002042B8CDC40>: 30}


The method above:
- assumes that the instances stored in the values dict are hashable
- causes a memory leak because there is a strong reference to the object stored in the values dictionary

In [14]:
# even if we delete p1
del p1

# We can still access it like this:
p1 = list(Point2D.x.values.keys())[0]
p1, p1.x, p1.y

(<__main__.Point2D at 0x2042b8cdd30>, 10, 20)

#### Strong and weak references

Weak reference
- a reference to an object that does not affect the reference count as far as the memory manager is concerned.

In [15]:
import weakref
import ctypes

def ref_count(address):
    return ctypes.c_long.from_address(address).value

In [16]:
class IntegerValue:
    def __init__(self):
        self.values = weakref.WeakKeyDictionary()
    
    def __set__(self, instance, value):
        self.values[instance] = int(value)

    def __get__(self, instance, owner_class):
        if instance is None:
            return self
        else:
            return self.values.get(instance) 

class Point:
    x = IntegerValue()



p = Point()
p.x = 100.3

print(Point.x.values.keyrefs())
del p
print(Point.x.values.keyrefs())

[<weakref at 0x000002042BA39810; to 'Point' at 0x000002042BA320A0>]
[]


- Now we don't need to store the values in the instance
- We are protected from memory leaks
- BUT it only works for hashable objects

In [17]:
class IntegerValue:
    def __init__(self):
        self.values = {}
    
    def __set__(self, instance, value):
        self.values[id(instance)] = int(value)

    def __get__(self, instance, owner_class):
        if instance is None:
            return self
        else:
            return self.values.get(id(instance)) 

class Point:
    x = IntegerValue()

    def __init__(self, x):
        self.x = x
    
    def __eq__(self, other):
        return isinstance(other, Point) and self.x == other.x

p = Point(10.2)
print(id(p), Point.x.values)
p_id = id(p)
print('ref count of p: ', ref_count(p_id))
del p

2216934891040 {2216934891040: 10}
ref count of p:  1


In [18]:
print('ref count of p: ', ref_count(p_id))

ref count of p:  0


In [19]:
# The issue now is that the reference to "p" still exists
Point.x.values

{2216934891040: 10}

In [37]:
def obj_destroyed(obj):
    print(f'{obj} is destroyed')

class IntegerValue:
    def __init__(self):
        self.values = {}
    
    def __set__(self, instance, value):
        self.values[id(instance)] = (weakref.ref(instance, self._remove_object), int(value))

    def __get__(self, instance, owner_class):
        if instance is None:
            return self
        else:
            return self.values[id(instance)][1]
    
    def _remove_object(self, weak_ref):
        for key, value in self.values.items():
            if value[0] is weak_ref:
                del self.values[key]
                break


class Point:
    x = IntegerValue()


p = Point()
p.x = 10.1
p.x

10

In [38]:
Point.x.values

{2216964771312: (<weakref at 0x000002042D2E5D60; to 'Point' at 0x000002042D65CDF0>,
  10)}

In [39]:
del p

In [40]:
Point.x.values

{}

- We now have solved the issue with cleaning up any references to the deleted object
- But there is still an issue with the weak reference being stored in \__weakref__ of the class

In [44]:
class ValidString:
    def __init__(self, min_length=0, max_length=255):
        self.data = {}
        self._min_length = min_length
        self._max_length = max_length

    def __set__(self, instance, value):
        if not isinstance(value, str):
            raise ValueError('Value must be a string')
        if len(value) < self._min_length:
            raise ValueError(f'Value should be at least {self._min_length} characters')
        if len(value) > self._max_length:
            raise ValueError(f'Value cannot exceed {self._max_length}')
        self.data[id(instance)] = (weakref.ref(instance, self._finalize_instance), value)

    def __get__(self, instance, owner_class):
        if instance is None:
            return self
        else:
            value_tuple = self.data.get(id(instance))
            return value_tuple[1]

    def _finalize_instance(self, weak_ref):
        for key, value in self.data.items():
            if value[0] is weak_ref:
                del self.data[key]
                break


class Person:
    __slots__ = '__weakref__'

    first_name = ValidString(1, 100)
    last_name = ValidString(1, 100)

    def __eq__(self, other):
        return(
            isinstance(other, Person) and
            self.first_name == other.first_name and
            self.last_name == other.last_name
        )

class BankAccount:
    __slots__ = '__weakref__'

    account_number = ValidString(5, 25)

    def __eq__(self, other):
        return isinstance(other, BankAccount) and self.account_number == other.account_number

p1 = Person()
p2 = Person()
p1.first_name, p1.last_name = 'Guido', 'van Rossum'
p2.first_name, p2.last_name = 'Raymond', 'Hettinger'

print(f'{p1.first_name} {p1.last_name}')
print(f'{p2.first_name} {p2.last_name}')
print(Person.first_name.data)
print(Person.last_name.data)

Guido van Rossum
Raymond Hettinger
{2216952761072: (<weakref at 0x000002042D86D770; to 'Person' at 0x000002042CAE8AF0>, 'Guido'), 2216959815584: (<weakref at 0x000002042D29FE00; to 'Person' at 0x000002042D1A2FA0>, 'Raymond')}
{2216952761072: (<weakref at 0x000002042D851590; to 'Person' at 0x000002042CAE8AF0>, 'van Rossum'), 2216959815584: (<weakref at 0x000002042D2E59F0; to 'Person' at 0x000002042D1A2FA0>, 'Hettinger')}


#### The \__set_name__ method

In [60]:
class ValidString:
    def __init__(self, min_length=None):
        self.min_length = min_length
    
    def __set_name__(self, owner_class, property_name):
        #print(f'__set_name__: owner={owner_class}, property_name={property_name}')
        self.property_name = property_name
    
    def __set__(self, instance, value):
        if not isinstance(value, str):
            raise ValueError(f'{self.property_name} must be of type String')
        if self.min_length is not None and len(value) < self.min_length:
            raise ValueError(f'{self.property_name} must be at least {self.min_length} characters')
        key = '_' + self.property_name
        setattr(instance, key, value)

    
    def __get__(self, instance, owner_class):
        if instance is None:
            return self
        key = '_' + self.property_name
        return getattr(instance, key, None)

class Person:
   
    first_name = ValidString(2)
    last_name = ValidString(2)


p = Person()

try:
    p.first_name = 'Guido'
    p.last_name = 'van Rossum'
except ValueError as e:
    print(e)


print(f'{p.first_name} {p.last_name}')
print('p.__dict__ : ', p.__dict__)

Guido van Rossum
p.__dict__ :  {'_first_name': 'Guido', '_last_name': 'van Rossum'}


In [61]:
class ValidString:
    def __init__(self, min_length=None):
        self.min_length = min_length
    
    def __set_name__(self, owner_class, property_name):
        #print(f'__set_name__: owner={owner_class}, property_name={property_name}')
        self.property_name = property_name
    
    def __set__(self, instance, value):
        if not isinstance(value, str):
            raise ValueError(f'{self.property_name} must be of type String')
        if self.min_length is not None and len(value) < self.min_length:
            raise ValueError(f'{self.property_name} must be at least {self.min_length} characters')
        # difference in code from above in the __set__ method
        instance.__dict__[self.property_name] = value

    
    def __get__(self, instance, owner_class):
        if instance is None:
            return self
        # difference in code from above in the __get__ method
        return instance.__dict__.get(self.property_name)

class Person:
   
    first_name = ValidString(2)
    last_name = ValidString(2)


p = Person()

try:
    p.first_name = 'Guido'
    p.last_name = 'van Rossum'
except ValueError as e:
    print(e)


print(f'{p.first_name} {p.last_name}')
print('p.__dict__ : ', p.__dict__)

Guido van Rossum
p.__dict__ :  {'first_name': 'Guido', 'last_name': 'van Rossum'}


#### Property lookup resolution:
- data descriptors (both \__get\__ and \__set__ are defined):
    * always override the instance dictionary (by default - can override this behavior)
    * see below

In [62]:
class IntegerValue:
    def __set__(self, instance, value):
        print('__set__ called')
    
    def __get__(self, instance, owner_class):
        print('__get__ called')

class Point:
    x = IntegerValue()

p = Point()
p.x = 100
p.x

__set__ called
__get__ called


#### Properties and descriptors (**properties ARE descriptors**)

In [72]:
from numbers import Integral

class Person:
    @property
    def age(self):
        return getattr(self, '_age', None)
    
    @age.setter
    def age(self, value):
        if not isinstance(value, Integral):
            raise ValueError('age: must be an integer')
        if value < 0:
            raise ValueError('age: must be a non-negative integer')
        self._age = value

# THIS IS EQUIVALENT TO:

class Person:

    def get_age(self):
        return getattr(self, '_age', None)
    
    def set_age(self, value):
        if not isinstance(value, Integral):
            raise ValueError('age: must be an integer')
        if value < 0:
            raise ValueError('age: must be a non-negative integer')
        self._age = value
    
    age = property(fget=get_age, fset=set_age)

prop = Person.age
print('has __get__?: ', hasattr(prop, '__get__'))
print('has __set__?: ', hasattr(prop, '__set__'))

has __get__?:  True
has __set__?:  True


In [75]:
class MakeProperty:
    def __init__(self, fget=None, fset=None):
        self.fget = fget
        self.fset = fset

    def __set_name__(self, owner_class, prop_name):
        print(f'__set_name__: owner={owner_class}, property_name={prop_name}')
        self.prop_name = prop_name

    def __get__(self, instance, owner_class):
        print('__get__ called')
        if instance is None:
            return self
        if self.fget is None:
            raise AttributeError(f'{self.prop_name} is not readable')
        return self.fget(instance)

    def __set__(self, instance, value):
        print('__set__called')
        if self.fset is None:
            raise AttributeError(f'{self.prop_name} is not writable')
        self.fset(instance, value)

class Person:
    def get_name(self):
        return getattr(self, '_name', None)
    
    def set_name(self, value):
        self._name = value

    name = MakeProperty(fget=get_name, fset=set_name)

p = Person()
p.name ='Guido'
p.name

__set_name__: owner=<class '__main__.Person'>, property_name=name
__set__called
__get__ called


'Guido'

#### Application: Example 1

In [83]:
class ValidType:
    def __init__(self, type_):
        self._type = type_

    def __set_name__(self, owner_class, property_name):
        self.property_name = property_name
    
    def __set__(self, instance, value):
        if not isinstance(value, self._type):
            raise ValueError(f'{self.property_name} must be of type {self._type.__name__}')
        instance.__dict__[self.property_name] = value

    def __get__(self, instance, owner_class):
        if instance is None:
            return self
        else:
            return instance.__dict__.get(self.property_name, None)

class Person:
    age = ValidType(int)
    height = ValidType(float)
    tags = ValidType(list)

p = Person()
try:
    p.age = 30.5
except ValueError as e:
    print(e)


age must be of type int
