### Notes:

- Use heaps when all we care about is the **k-largest** or the **k-smallest** elements in a collection and we do not need to support fast-lookup, delete or search operations.
- The `heapq` module only provides min-heap functionality. Hence, use negative keys (for numbers) to emulate max-heap functionality.

In [2]:
from itertools import islice
import heapq
import math

class MinHeap():
    """
    Heap wrapper over `heapq`
    """
    def __init__(self):
        self._heap = []
    
    def push(self, item):
        heapq.heappush(self._heap, item)
    
    def pop(self):
        return heapq.heappop(self._heap)
    
    def pushpop(self, item):
        return heapq.heappushpop(self._heap, item)
    
    def length(self):
        return len(self._heap)
        
    def is_empty(self):
        return len(self._heap) == 0

## 10.1 Merge Sorted Files

In [31]:
def merge_sorted(sorted_arrays):
    """
    Given a list of sorted arrays,
    Merges the given arrays in a sorted order
    """
    min_heap, result = MinHeap(), []
    sa_iter = [iter(x) for x in sorted_arrays]
    
    for i, it in enumerate(sa_iter):
        first_elem = next(it, None)
        if first_elem is not None:
            min_heap.push((first_elem, i))
    
    while not min_heap.is_empty():
        smallest, smallest_iter_index = min_heap.pop()
        result.append(smallest)
        next_elem = next(sa_iter[smallest_iter_index], None)
        if next_elem is not None:
            min_heap.push((next_elem, smallest_iter_index))
    return result


def merge_sorted_2(sorted_arrays):
    """
    A direct pythonic implementation
    """
    return list(heapq.merge(*sorted_arrays))
        
# Tests
assert merge_sorted([[3, 5, 7], [0, 6], [0, 6, 28]]) == [0, 0, 3, 5, 6, 6, 7, 28]
assert merge_sorted_2([[3, 5, 7], [0, 6], [0, 6, 28]]) == [0, 0, 3, 5, 6, 6, 7, 28]

Space Complexity = `O(k)`

Time Complexity = `O(nlogk)`, where `k` is the number of sequences.

## 10.3 Sort an almost sorted array

In [4]:
def sort_almost_sorted(sequence, k):
    min_heap, result = MinHeap(), []
    
    # Insert first-k items
    for item in islice(sequence, k):
        min_heap.push(item)
    
    # Add subsequent items
    for item in sequence:
        result.append(min_heap.pushpop(item))
    
    # Empty the heap
    while not min_heap.is_empty():
        result.append(min_heap.pop())
    
    return result

# Tests
assert sort_almost_sorted(iter([3, -1, 2, 6, 4, 5, 8]), 2) == [-1, 2, 3, 4, 5, 6, 8]

Space Complexity = `O(k)`

Time Complexity = `O(nlogk)`

## 10.4 Compute the k-closest stars

In [6]:
# Helper
def euclidean_dist(p1, p2):
    squared_diff = [(a - b)**2 for a, b in zip(p1, p2)]
    return math.sqrt(sum(squared_diff))

# Main
def find_closest_k_stars(stars, k):
    """
    Returns the k-closest stars to earth
    """
    earth, max_heap = (0, 0, 0), []
    for star in stars:
        heapq.heappush(max_heap, (-euclidean_dist(earth, star), star))
        if len(max_heap) == k + 1:
            heapq.heappop(max_heap)
    return [s[1] for s in max_heap]

# Tests
assert find_closest_k_stars([(1, 2, 2), (100, 100, 2)], 1) == [(1, 2, 2)]

Space Complexity = `O(k)`

Time Complexity = `O(nlogk)`

In [7]:
# Variant: Design an algorithm that reads a sequence of `n` elements and for each element, starting from the
# `kth` element, prints the `kth` largest element read up to that point. The length of the sequence is not known 
# in advance. time and space complexities should be `O(nlogk)` and `O(k)` respectively. What are the worst-cast inputs to your
# algorithm?

def kth_largest_so_far(sequence, n, k):
    """
    For all the elements starting from the k-th element, returns the k-th largest element read so far.
    """
    min_heap, result = [], []
    for i, num in enumerate(islice(sequence, n)):
        if i < k:
            heapq.heappush(min_heap, num)
            if i == k - 1:
                result.append(min_heap[0])
        else:
            if num > min_heap[0]:
                heapq.heappushpop(min_heap, num)
            result.append(min_heap[0])
    return result
        

# Tests
assert(kth_largest_so_far(iter([1, 2, 3, 4, 5]), 5, 3)) == [1, 2, 3] # Worst case input: Sorted in ascending order
assert(kth_largest_so_far(iter([5, 4, 3, 2, 1]), 5, 3)) == [3, 3, 3]

## 10.5  Compute the median of online data

In [11]:
def running_median(seq):
    """
    Returns the running median of the given iterable
    """
    left, right = [], []  # left = maxheap, right = minheap
    for i in seq:
        # Populate min and max heaps
        heapq.heappush(left, -heapq.heappushpop(right, i))
        if len(left) > len(right):
            heapq.heappush(right, -heapq.heappop(left))
        
        # Compute and return median
        if len(left) == len(right):
            yield ((-left[0]) + right[0]) * 0.5
        else:
            yield right[0]

# Tests
seq = [1, 0, 3, 5, 2, 0, 1]
medians = [med for med in running_median(iter(seq))]
assert medians == [1, 0.5, 1, 2, 2, 1.5, 1]

Time Complexity: `O(n log n)` to compute all medians

Space Complexity: `O(n)`