## **Code playground for SDA sem 10**


### Heap


Implementing a heap can be done using an array.


In [1]:
class MinHeap:
    def __init__(self):
        self.h = []

    def get_left_index(self, index):
        return 2 * index + 1

    def get_right_index(self, index):
        return 2 * index + 2

    def get_parent_index(self, index):
        return (index - 1) // 2

    def top(self):
        return self.h[0]

    def size(self):
        return len(self.h)

    def insert(self, val):
        new_index = self.size()
        self.h.append(val)

        child = new_index
        parent = self.get_parent_index(child)

        while child > 0 and self.h[parent] > self.h[child]:
            self.h[parent], self.h[child] = self.h[child], self.h[parent]
            child = parent
            parent = self.get_parent_index(child)

    def pop(self):
        last_index = self.size() - 1
        if last_index < 0:
            return None

        val = self.h[0]
        self.h[0] = self.h[last_index]

        del [self.h[last_index]]
        self.heapify(0)

        return val

    def heapify(self, index):
        l = self.get_left_index(index)
        r = self.get_right_index(index)

        min_index = index
        if l < self.size() and self.h[l] < self.h[min_index]:
            min_index = l

        if r < self.size() and self.h[r] < self.h[min_index]:
            min_index = r

        if min_index != index:
            self.h[min_index], self.h[index] = self.h[index], self.h[min_index]
            self.heapify(min_index)

In [2]:
my_heap = MinHeap()

arr = [7, 3, 4, 9, 6, 1]

for el in arr:
    my_heap.insert(el)
    print(my_heap.h)

for _ in range(my_heap.size()):
    print(my_heap.h)
    print(my_heap.pop())

[7]
[3, 7]
[3, 7, 4]
[3, 7, 4, 9]
[3, 6, 4, 9, 7]
[1, 6, 3, 9, 7, 4]
[1, 6, 3, 9, 7, 4]
1
[3, 6, 4, 9, 7]
3
[4, 6, 7, 9]
4
[6, 9, 7]
6
[7, 9]
7
[9]
9


### **Heap sort**


Converts an array into a heap and then pops each element. Works _in-place_ - _O(1)_ space complexity, _NlogN_ time complexity.


In [3]:
def heapify(arr, length, index):
    l = 2 * index + 1
    r = 2 * index + 2

    min_index = index
    if l < length and arr[l] < arr[min_index]:
        min_index = l

    if r < length and arr[r] < arr[min_index]:
        min_index = r

    if min_index != index:
        arr[min_index], arr[index] = arr[index], arr[min_index]
        heapify(arr, length, min_index)


def heap_sort(arr):
    N = len(arr)

    # Heapify the array
    for i in reversed(range(N // 2)):
        heapify(arr, N, i)

    # Simulate popping by adding minimum element to the back
    for i in reversed(range(N)):
        arr[0], arr[i] = arr[i], arr[0]
        heapify(arr, i, 0)


arr = [7, 3, 4, 9, 6, 1]
heap_sort(arr)

print(arr)  # [9, 7, 6, 4, 3, 1]

[9, 7, 6, 4, 3, 1]


### **heapq**


Allows _push_ and _pop_ operations in logarithmic time. Works over a _list_. Can transform a _list_ in _Minimum heap_ using _heapq.heapify()_ method in linear time.


In [4]:
import heapq

h = [7, 3, 4, 9, 6]
heapq.heapify(h)

print(h[0])  # 3
heapq.heappush(h, 5)

while h:
    print(heapq.heappop(h))  # 3, 4, 5, 6, 7, 9

3
3
4
5
6
7
9


In [5]:
import heapq

h = []
heapq.heappush(h, -10)
heapq.heappush(h, -3)
heapq.heappush(h, -7)

print(-heapq.heappop(h))  # 10

10


In [6]:
import heapq

arr = [10, 3, 7]
h = [-x for x in arr]
heapq.heapify(h)

print(-heapq.heappop(h))  # 10

10


In [7]:
import heapq

h = []

heapq.heappush(h, (-90, "Gosho"))
heapq.heappush(h, (-75, "Pesho"))
heapq.heappush(h, (-82, "Ivan"))

while h:
    neg_score, name = heapq.heappop(h)
    score = -neg_score
    print(name, score)

Gosho 90
Ivan 82
Pesho 75


### **queue.PriorityQueue**


Allows _push_ and _pop_ operations in logarithmic time. Is thread safe, so there is added complexity.


In [8]:
from queue import PriorityQueue

pq = PriorityQueue()
arr = [7, 3, 4, 9, 6]

for el in arr:
    pq.put(el)

print(pq.queue[0])  # 3
pq.put(5)

while not pq.empty():
    print(pq.get())  # 3, 4, 5, 6, 7, 9

3
3
4
5
6
7
9


### Time comparison


Because of the thread safe mechanisms the _queue.PriorityQueue_ is expected to perform **worse** than the _heapq_ implementation.


In [9]:
import random
import heapq
from queue import PriorityQueue


def pq_sort(size, max_element):
    pq = PriorityQueue()

    for _ in range(size):
        pq.put(random.randint(0, max_element))

    return [pq.get() for _ in range(size)]


def heapq_sort(size, max_element):
    h = []
    for _ in range(size):
        heapq.heappush(h, random.randint(0, max_element))

    return [heapq.heappop(h) for _ in range(size)]

In [10]:
size, max_element = 100_000, 1_000_000

%timeit heapq_sort(size, max_element)
%timeit pq_sort(size, max_element)

218 ms ± 31.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
591 ms ± 75.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


Also note that _heapq_ can work _in-place_, modifying the starting list, which will further improve performance.
