<a href="https://colab.research.google.com/github/Thrishankkuntimaddi/Data-Structures-and-Algorithms-Advanced/blob/main/15%20-%20Heap.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Sort a K-Sorted Array

I/P : arr = [9, 8, 7, 18, 19, 17] ; k = 2

O/P : [7, 8, 9, 17, 18, 19]

In [1]:
# Implementation

import heapq

def sortk(arr, k):
    n = len(arr)
    pq = arr[:k+1]
    heapq.heapify(pq)
    index = 0

    for i in range(k + 1, n):
        arr[index] = heapq.heappop(pq)
        index += 1
        heapq.heappush(pq, arr[i])

    while pq:
        arr[index] = heapq.heappop(pq)
        index += 1

def printArray(arr):
    for i in arr:
        print(i, end=" ")
    print()

# Example usage:
arr = [6, 5, 3, 2, 8, 10, 9]
k = 3
sortk(arr, k)
print("Sorted array:")
printArray(arr)

# Time Complexity : O(n + klogk)
# Space Complexity : O(k + 1)

Sorted array:
2 3 5 6 8 9 10 


# Purchase Maximum Items

I/P : cost = [1, 12, 5, 111, 200] ; sum = 10

O/P : 2

In [3]:
# Naive Solution

def PMI(cost, sum):
  res = 0
  cost.sort()

  for i in cost:
    if i <= sum:
      sum -= i
      res += 1
    else:
      break
  print(res)

cost = [1, 12, 5, 111, 200]
sum = 10
PMI(cost, sum)

# Time Complexity : O(n logn)
# Space Complexity : O(1)

2


In [4]:
# Efficient Solution

def maxElement(cost, sum):
  res = 0
  pq = cost
  heapq.heapify(pq)

  for i in cost:
    top = heapq.heappop(pq)
    if top <= sum:
      sum -= top
      res += 1
    else:
      break

  return res

cost = [1, 12, 5, 111, 200]
sum = 10
PMI(cost, sum)

# Time Complexity : O(n) + O(res * logn)
# Space Complexity : O(1)

2


# K Largest Elements

I/P : arr = [5, 15, 10, 20, 8] ; k = 2

O/P : 15 20


      1. Build a minHeap of first k items

      2. Traverse from (k+1)th element

          a. Compare current element with top of heap, If smaller then top, ignore it

          b. else remove the top element and insert the current element in the minHeap

      3. print contents of minHeap

In [7]:
# Implementation

import heapq

def k_largest_elements(arr, k):
    if k <= 0 or k > len(arr):
        return []

    # Step 1: Build a min-heap with the first k elements
    min_heap = arr[:k]
    heapq.heapify(min_heap)

    # Step 2: Traverse from the (k + 1)th element
    for i in range(k, len(arr)):
        if arr[i] > min_heap[0]:                  # Step 2a: Compare current element with the top of the heap
            heapq.heappop(min_heap)               # Step 2b: Remove the top element
            heapq.heappush(min_heap, arr[i])      # Insert the current element

    # Step 3: Print contents of minHeap
    return min_heap

def printArray(arr):
    for i in arr:
        print(i, end=" ")
    print()

arr = [5, 15, 10, 20, 8]
k = 2
result = k_largest_elements(arr, k)
printArray(result)


# Time Complexity : O(k + (n-k) * logk)

15 20 


In [9]:
# Comparison with MaxHeap based solution

import heapq

def k_largest_elements_max_heap(arr, k):
    if k <= 0 or k > len(arr):
        return []

    # Step 1: Build a max-heap of all elements (simulate with negative values)
    max_heap = [-x for x in arr]                           # Invert the values for max-heap simulation
    heapq.heapify(max_heap)

    # Step 2: Extract the k largest elements
    largest_elements = []
    for _ in range(k):
        largest_elements.append(-heapq.heappop(max_heap))  # Invert back to original values

    return largest_elements

# Example usage:
arr = [5, 15, 10, 20, 8]
k = 2
result = k_largest_elements_max_heap(arr, k)
printArray(result)

# Time Complexity : O(n logn)

20 15 


# K Closest Elements

I/P : arr = [10, 15, 7, 3, 4] ; x = 8 ; k = 2

O/P : 7 10

In [13]:
# Naive Solution

def kClosest(arr, k, x):
    for i in range(k):
        mi = 0

        for j in range(1, len(arr)):

            if abs(arr[j] - x) < abs(arr[mi] - x): # Check if the current element is closer to x than the current minimum
                mi = j

        print(arr[mi], end=" ")                    # Print the closest element

        arr.pop(mi)                                # Remove the closest element from the array

# Example usage:
arr = [5, 15, 10, 20, 8]
k = 3
x = 12
kClosest(arr, k, x)

# Time Complexity : O(nk)

10 15 8 

In [18]:
# Efficient Solution

# This problem is a variation of k smallest elements problem

import heapq

def kcloses(arr, x, k):
    h = []
    n = len(arr)

    for i in range(k):
        heapq.heappush(h, (-abs(arr[i] - x), i))

    for i in range(k, n):
        curr = -abs(arr[i] - x)
        p = h[0][0]

        if curr > p:
            heapq.heappop(h)
            heapq.heappush(h, (curr, i))

    while h:
        pipi = heapq.heappop(h)
        print(arr[pipi[1]], end=" ")

arr = [5, 15, 10, 20, 8]
k = 3
x = 12
kcloses(arr, x, k)

# Time Complexity : O(n logk)

8 15 10 

# Merge K Sorted Arrays

I/P : arr =

        [[10, 20, 30],
         [5, 15],
         [1, 9, 11, 18]]

O/P : res = [1, 5, 9, 10, 11, 15, 18, 20, 30]


In [19]:
# Super Naive Solution
'''
1. put all elements in res()
2. sort res()

# Time Complexity : O(nk lognk)
n -> maximum no.of elements in an array
k -> no.of input arrays
'''

def mergeKSortedArrays(arrays):
    res = []
    for array in arrays:
        res.extend(array)

    res.sort()

    return res

arrays = [
    [10, 20, 30],
    [5, 15],
    [1, 9, 11, 18]
]

merged_result = mergeKSortedArrays(arrays)
print("Merged array:", merged_result)

# Time Complexity : O(nk lognk)

Merged array: [1, 5, 9, 10, 11, 15, 18, 20, 30]


In [20]:
# Naive Solution

'''
1. Copy first array to res[]
2. Do following for remaining arrays starting from the second array. merge current array into res[]
'''

def mergeKSortedArrays(arrays):
    res = arrays[0].copy()

    for i in range(1, len(arrays)):
        res = mergeTwoSortedArrays(res, arrays[i])

    return res

def mergeTwoSortedArrays(arr1, arr2):
    result = []
    i, j = 0, 0

    while i < len(arr1) and j < len(arr2):
        if arr1[i] < arr2[j]:
            result.append(arr1[i])
            i += 1
        else:
            result.append(arr2[j])
            j += 1

    result.extend(arr1[i:])
    result.extend(arr2[j:])
    return result

arrays = [
    [10, 20, 30],
    [5, 15],
    [1, 9, 11, 18]
]

merged_result = mergeKSortedArrays(arrays)
print("Merged array:", merged_result)

# Time Complexity : O(n * k^2)

Merged array: [1, 5, 9, 10, 11, 15, 18, 20, 30]


In [23]:
# Efficient Solution

import heapq

def mergeK(arrays):
    res = []
    h = []

    for i in range(len(arrays)):
        if arrays[i]:
            heapq.heappush(h, (arrays[i][0], i, 0))

    while h:
        val, array_index, value_index = heapq.heappop(h)
        res.append(val)

        if value_index + 1 < len(arrays[array_index]):
            next_value = arrays[array_index][value_index + 1]
            heapq.heappush(h, (next_value, array_index, value_index + 1))

    return res

arrays = [
    [10, 20, 30],
    [5, 15],
    [1, 9, 11, 18]
]
merged_result = mergeK(arrays)
print("Merged array:", merged_result)

# Time Complexity : O(nlogk)

Merged array: [1, 5, 9, 10, 11, 15, 18, 20, 30]


# Median of a Stream

I/P : arr = [25, 7, 10, 15, 20]

O/P : 25, 16, 10, 12.5, 15


        sequence       median

           {20}          20
          {20, 10}       15
        {20, 10, 30}     20
      {20, 10, 30, 7}    15  

In [24]:
# Naive Solution

def findMedian(arr):
    temp = []
    medians = []

    for number in arr:
        temp.append(number)
        temp.sort()

        size = len(temp)
        if size % 2 != 0:
            median = temp[size // 2]
        else:
            median = (temp[size // 2] + temp[(size // 2) - 1]) / 2

        medians.append(median)

    return medians

arr = [25, 7, 10, 15, 20]
medians_result = findMedian(arr)
print("Medians:", medians_result)

# Time Complexity : O(n^2)

Medians: [25, 16.0, 10, 12.5, 15]


In [25]:
# Better Solution (Augumented BST)
'''
-> Create an BST
-> By add all elements one by one
-> Basedon that traverse or decide n/2 value and find median
'''

class Node:
    def __init__(self, key):
        self.left = None
        self.right = None
        self.key = key
        self.count = 1

class AugmentedBST:
    def __init__(self):
        self.root = None

    def insert(self, root, key):
        if root is None:
            return Node(key)

        if key < root.key:
            root.left = self.insert(root.left, key)
        elif key > root.key:
            root.right = self.insert(root.right, key)

        root.count += 1
        return root

    def findMedian(self):
        if self.root is None:
            return None

        size = self.root.count
        if size % 2 == 1:
            return self._findKth(self.root, size // 2 + 1)
        else:
            left_median = self._findKth(self.root, size // 2)
            right_median = self._findKth(self.root, size // 2 + 1)
            return (left_median + right_median) / 2

    def _findKth(self, node, k):
        if node is None:
            return None

        left_size = node.left.count if node.left else 0

        if left_size + 1 == k:
            return node.key
        elif k <= left_size:
            return self._findKth(node.left, k)
        else:
            return self._findKth(node.right, k - left_size - 1)

augmented_bst = AugmentedBST()
elements = [25, 7, 10, 15, 20]
medians = []

for elem in elements:
    augmented_bst.root = augmented_bst.insert(augmented_bst.root, elem)
    median = augmented_bst.findMedian()
    medians.append(median)

print("Medians:", medians)


Medians: [25, 16.0, 10, 12.5, 15]


In [29]:
# Efficient Solution
'''
s : maxHeap containing smaller half
g : minHeap containing greater half

Do the following for every item x:

    a) s.push(x)
    b) g.push(s.pop())
    c) if size(g) > size(s)
          s.push(g.pop())
    d) if size(s) > size(g)
          print(s.top())
       else:
          print((s.top() + g.top())/2)

-> MaxHeap s is implemented using minHeap and negative of every element
'''

import heapq

def streamMed(arr):
  s, g = [], []

  for i in range(len(arr)):
    heapq.heappush(s, -arr[i])
    heapq.heappush(g, -heapq.heappop(s))

    if len(g) > len(s):
      heapq.heappush(s, -heapq.heappop(g))

    if len(g) < len(s):
      print(-s[0], end = " ")
    else:
      print((g[0] - s[0])/2, end = " ")

elements = [25, 7, 10, 15, 20]
streamMed(elements)

25 16.0 10 12.5 15 