# 1) What is hashing?

Hashing maps data of arbitrary size (a key) to a fixed-size integer called a hash.
A hash table uses this integer (mod table size) as an index to store/retrieve values in amortized O(1) time.

##### Hash function properties (for DSA):

Deterministic: same input → same output during a run.

Uniform-ish: spreads keys across buckets evenly.

Efficient: O(1) to compute.

Low collisions: different keys rarely map to the same index.

# 2) Collisions and how to handle them

Two different keys can land in the same bucket → collision. Strategies:

###### a) Separate Chaining

Each bucket holds a small list (or linked list) of key–value pairs.

Insert/search only within that list.

###### b) Open Addressing

Store entries directly in the array.

On collision, probe other slots:

Linear probing: try i, i+1, i+2, …

Quadratic probing: try i + 1², i + 2², …

Double hashing: second hash decides step size.

Load factor (α) = items / capacity

Keep α below a threshold (≈ 0.5–0.75) to keep probes/lists short.

If α too high → resize (allocate bigger table and reinsert).

# 3) A minimal hash table in Python (two versions)

#### 3.1 Separate Chaining (simple, robust)

In [2]:
class HashMapChaining:
    def __init__(self, initial_capacity=8, load_factor=0.75):
        self._n = 0
        self._cap = initial_capacity
        self._load_factor = load_factor
        self._buckets = [[] for _ in range(self._cap)]

    def _index(self, key):
        return hash(key) & (self._cap - 1) if (self._cap & (self._cap - 1)) == 0 else hash(key) % self._cap

    def _resize(self):
        old = self._buckets
        self._cap *= 2
        self._buckets = [[] for _ in range(self._cap)]
        self._n = 0
        for bucket in old:
            for k, v in bucket:
                self[k] = v

    def __setitem__(self, key, value):
        if self._n + 1 > self._cap * self._load_factor:
            self._resize()
        idx = self._index(key)
        bucket = self._buckets[idx]
        for i, (k, _) in enumerate(bucket):
            if k == key:
                bucket[i] = (key, value)
                return
        bucket.append((key, value))
        self._n += 1

    def __getitem__(self, key):
        idx = self._index(key)
        for k, v in self._buckets[idx]:
            if k == key:
                return v
        raise KeyError(key)

    def __delitem__(self, key):
        idx = self._index(key)
        bucket = self._buckets[idx]
        for i, (k, _) in enumerate(bucket):
            if k == key:
                bucket.pop(i)
                self._n -= 1
                return
        raise KeyError(key)

    def __contains__(self, key):
        idx = self._index(key)
        return any(k == key for k, _ in self._buckets[idx])

    def __len__(self):
        return self._n

hm = HashMapChaining()
hm["name"] = "Alice"
hm["age"] = 25

print(hm["name"])     
print("age" in hm)      

del hm["age"]
print(len(hm))     


Alice
True
1


###### 3.2 Open Addressing (linear probing)

In [3]:
class HashMapOpenAddressing:
    _EMPTY = object()
    _DELETED = object()

    def __init__(self, initial_capacity=8, load_factor=0.5):
        self._cap = initial_capacity
        self._load_factor = load_factor
        self._n = 0
        self._keys = [self._EMPTY] * self._cap
        self._vals = [None] * self._cap

    def _index(self, key):
        return hash(key) & (self._cap - 1) if (self._cap & (self._cap - 1)) == 0 else hash(key) % self._cap

    def _probe(self, idx):
        # Linear probing
        while True:
            yield idx
            idx = (idx + 1) % self._cap

    def _resize(self):
        old_keys, old_vals = self._keys, self._vals
        self._cap *= 2
        self._keys = [self._EMPTY] * self._cap
        self._vals = [None] * self._cap
        self._n = 0
        for k, v in zip(old_keys, old_vals):
            if k is not self._EMPTY and k is not self._DELETED:
                self[k] = v

    def __setitem__(self, key, value):
        if self._n + 1 > self._cap * self._load_factor:
            self._resize()
        idx0 = self._index(key)
        first_deleted = None
        for idx in self._probe(idx0):
            k = self._keys[idx]
            if k is self._EMPTY:
                target = first_deleted if first_deleted is not None else idx
                self._keys[target] = key
                self._vals[target] = value
                self._n += 1
                return
            elif k is self._DELETED:
                if first_deleted is None:
                    first_deleted = idx
            elif k == key:
                self._vals[idx] = value
                return

    def __getitem__(self, key):
        idx0 = self._index(key)
        for idx in self._probe(idx0):
            k = self._keys[idx]
            if k is self._EMPTY:
                break
            if k is not self._DELETED and k == key:
                return self._vals[idx]
        raise KeyError(key)

    def __delitem__(self, key):
        idx0 = self._index(key)
        for idx in self._probe(idx0):
            k = self._keys[idx]
            if k is self._EMPTY:
                break
            if k is not self._DELETED and k == key:
                self._keys[idx] = self._DELETED
                self._vals[idx] = None
                self._n -= 1
                return
        raise KeyError(key)

    def __contains__(self, key):
        try:
            _ = self[key]
            return True
        except KeyError:
            return False

    def __len__(self):
        return self._n

hm = HashMapOpenAddressing()

hm["name"] = "Alice"
hm["age"] = 25
hm["city"] = "Mysore"

print(hm["name"])      # Alice
print("age" in hm)     # True

del hm["age"]
print(len(hm))         # 2

try:
    print(hm["age"])
except KeyError:
    print("age not found")


Alice
True
2
age not found


# Problem 01 : Count frequency of each element in the array

In [6]:
# Solution 1: Use of two loops


def countfrequency(arr):
    visited = [False]*n
    for i in range(n):
        if(visited[i]==True):
            continue
        count = 1
        for j in range(i+1,n):
            if(arr[i] == arr[j]):
                visited[j] = True
                count += 1
        print(arr[i],count)

if __name__ == "__main__":
    
    arr = [10,5,10,15,10,5]
    n = len(arr)
    countfrequency(arr)

10 3
5 2
15 1


In [7]:
# Solution 2: Using Map

def Frequency(arr, n):
    mp = {}
    for i in range(n):
        if arr[i] in mp:
            mp[arr[i]] += 1
        else:
            mp[arr[i]] = 1
    for x in mp:
        print(x,mp[x])
        
if __name__ == "__main__":
    arr = [10,5,10,15,10,5]
    n = len(arr)
    Frequency(arr,n)

10 3
5 2
15 1


# Find the highest/lowest frequency element

In [16]:
# solution 1 : Brute-Force approach(Using two loops): 

def high_low_Frequency(arr):
    visited = [False]*n
    
    max_freq = -1
    min_freq = n + 1
    max_elem = None
    min_elem = None
    
    for i in range(n):
        if visited[i] == True:
            continue
        count = 1
        for j in range(i+1,n):
            if arr[i] == arr[j]:
                visited[j] = True
                count += 1
        
        if count > max_freq:
            max_freq = count
            max_elem = arr[i]
            
        if count < min_freq:
            min_freq = count
            min_elem = arr[i]
        
    print(f"The Max Frequency element is {max_elem} and the min Frequency element is {min_elem}")
        
if __name__ == "__main__":
    arr = [10,5,10,15,10,5]
    n = len(arr)
    high_low_Frequency(arr)
    
# Time Complexity: O(N*N)
# Space Complexity:  O(N) 

The Max Frequency element is 10 and the min Frequency element is 15


In [17]:
# Solution 2 : Optimized approach(Using map):

def frequency(arr):
    
    freq_map = {}
    for num in arr:
        freq_map[num] = freq_map.get(num, 0) + 1

    max_freq = 0
    min_freq = len(arr)
    max_ele = None
    min_ele = None

    for element, count in freq_map.items():
        if count > max_freq:
            max_freq = count
            max_ele = element
        if count < min_freq:
            min_freq = count
            min_ele = element

    print("The highest frequency element is:", max_ele)
    print("The lowest frequency element is:", min_ele)


if __name__ == "__main__":
    arr = [10, 5, 10, 15, 10, 5]
    frequency(arr)


The highest frequency element is: 10
The lowest frequency element is: 15
