In [62]:
# hash
# __hash__
#def __hash__
# sys.hash_info.width
# sys.hash_info.mod

In [1]:
t1 = (1,2,3)
t2 = (1,2,3)

In [2]:
t1==t2

True

In [3]:
t1 is t2

False

In [4]:
d = {t1 : 100}

In [5]:
d[t1]

100

In [6]:
d[t2]

100

In [9]:
hash(t1) ==  hash(t2)

True

In [10]:
#One of the basic premises of hashes is that if two objects compare equal, 
# they must have the same hash.

In [11]:
# The dict value will be retrieved if hashes are equal and retrieving key satisfies ==
# compared to the hash and == of the original key.

In [12]:
# creating hash of custom objects - python makes hash from memory address

In [13]:
class Person:
    def __init__(self, name, age):
        self.name = name
        self.age = age
        
    def __repr__(self):
        return f'Person(name={self.name}, age={self.age})'    

In [14]:
p1 = Person('ak', 12)
p2 = Person('bk', 21)

In [15]:
id(p1), id(p2)

(2519153851848, 2519153850824)

In [16]:
d= {p1: 'Person object 1', p2: 'Person object 2'}

In [17]:
d[p1]

'Person object 1'

In [18]:
d= {p1: str(p1), p2: str(p2)}

In [19]:
d[p1]

'Person(name=ak, age=12)'

In [20]:
d[p2]

'Person(name=bk, age=21)'

In [21]:
# object with same parameters won't work.
# remember we want == of hash first, which depends on == of ids


In [22]:
p3 = Person('ak', 12)

In [24]:
p1 is p3

False

In [25]:
# therefore hash not equal


In [27]:
d[p3] # not equal to p1

KeyError: Person(name=ak, age=12)

In [28]:
# to make custom hash fucntiion in a class.

In [29]:
class Person:
    def __init__(self, name, age):
        self.name = name
        self.age = age
        
    def __repr__(self):
        return f'Person(name={self.name}, age={self.age})'
    
    def __eq__(self, other):
        if isinstance(other, Person):
            return self.name == other.name and self.age == other.age
        else:
            return False
    
    def __hash__(self):
        print('__hash__ called...')
        return hash((self.name, self.age))

In [30]:
# this way python won't use id to create hash as it does by default,
# the objects hash will depend on tuple of name and age.

In [31]:
# so objects with same name and age will have same hash and therefore
# can be intercchangeably used as dict keys, to set ot retrieve values.

In [32]:
p1 = ('ak', 12)
p2 = ('bk', 21)

In [33]:
d= {p1: str(p1), p2: str(p2)}

In [34]:
d[p1]

"('ak', 12)"

In [35]:
d[p2]

"('bk', 21)"

In [36]:
p3 = ('ak',12)

In [37]:
d[p3]

"('ak', 12)"

In [38]:
# We can make a custom class unhashable by setting __hash__ = None

In [39]:
class Person:
    def __init__(self, name, age):
        self.name = name
        self.age = age
        
    def __repr__(self):
        return f'Person(name={self.name}, age={self.age})'
    
    __hash__ = None

In [40]:
hash(Person('John', 78))

TypeError: unhashable type: 'Person'

In [41]:
#The `__hash__` method must return an integer - 
#Python will complain otherwise:

In [42]:
class Test:
    def __hash__(self):
        return 'a string'

In [43]:
hash(Test())

TypeError: __hash__ method should return an integer

In [44]:
import sys
sys.hash_info.width

64

When we call the `hash()` function, although it in turn calls the `__hash__` method, it does something more.

It will truncate the integer returned by `__hash__` to a certain width which is implementation dependent.

In [45]:
class Test:
    def __hash__(self):
        return 1_000_000_000_000_000_000

In [46]:
hash(Test())

1000000000000000000

In [49]:
class Test:
    def __hash__(self):
        return 10_000_000_000_000_000_000 # 10 times bigger than previous

In [50]:
hash(Test())

776627963145224196

In [51]:
# python uses mod to reduce its width

In [52]:
mod = sys.hash_info.modulus

In [53]:
mod

2305843009213693951

In [57]:
10_000_000_000_000_000_000% mod

776627963145224196

In [58]:
10_000_000_000_000_000_000 % mod == hash(Test())

True

As we have seen many times now, hash functions and hashable objects need to satisfy these conditions:
1. if a == b then hash(a) == hash(b)
2. hash(a) must be an integer

But nothing specifies here that unequal objects must result in unequal hashes.

The only issue with equal hashes with unequal objects is that we end up getting more collisions when looking up a key in a dictionary (refer to the earlier theory section if you want more details on this)

In [59]:
# it makes it more inefficient

In [61]:
# check out the examlples in the course book