1. Download the following text file:

**Median.txt**

The goal of this problem is to implement the "Median Maintenance" algorithm (covered in the Week 3 lecture on heap applications).  The text file contains a list of the integers from 1 to 10000 in unsorted order; you should treat this as a stream of numbers, arriving one by one.  Letting $x_i$ denote the $i$th number of the file, the $k$th median $m_k$ is defined as the median of the numbers $x_1,\ldots,x_kx$. (So, if kk is odd, then $m_k$ is $((k+1)/2))$th smallest number among $x_1,\ldots,x_kx$; if $k$ is even, then $m_km$ is the $(k/2)$th smallest number among $x_1,\ldots,x_kx$. 


In the box below you should type the sum of these 10000 medians, modulo 10000 (i.e., only the last 4 digits).  That is, you should compute $(m_1+m_2+m_3 + \cdots + m_{10000}) \bmod 10000$. 

OPTIONAL EXERCISE: Compare the performance achieved by heap-based and search-tree-based implementations of the algorithm.


In [3]:
import heapq


class Heap(object):
    def __init__(self, initial=None, key=lambda x: x):
        self.key = key
        if initial:
            self._data = [(key(item), item) for item in initial]
            heapq.heapify(self._data)
        else:
            self._data = []

    def push(self, item):
        heapq.heappush(self._data, (self.key(item), item))

    def pop(self):
        return heapq.heappop(self._data)[1]

    def peek(self):
        return self._data[0][1]

    def __len__(self):
        return len(self._data)


class MedianMaintainer:
    def __init__(self, input_file=None, input_array=None):
        self._heap_low = Heap(key=lambda x: -x)
        self._heap_high = Heap()
        self._median_sum = 0
        self.input_file = input_file
        self.input_array = input_array

    def sum_medians(self):
        if self.input_file is not None:
            with open(self.input_file) as file:
                for number in file.read().splitlines():
                    self._add_number(int(number))
        elif self.input_array is not None:
            for number in self.input_array:
                self._add_number(int(number))
        return self._median_sum % (len(self._heap_high) + len(self._heap_low))

    def _add_number(self, num):
        if not self._heap_low:
            self._heap_low.push(num)
            self._median_sum += num
            return
        if num <= self._heap_low.peek():
            self._heap_low.push(num)
        else:
            self._heap_high.push(num)
        if len(self._heap_low) - len(self._heap_high) > 1:
            self._heap_high.push(self._heap_low.pop())
        elif len(self._heap_high) - len(self._heap_low) > 1:
            self._heap_low.push(self._heap_high.pop())
        self._median_sum += self._heap_low.peek() if len(self._heap_low) >= len(
            self._heap_high) else self._heap_high.peek()

if __name__ == "__main__":
    median_maintainer = MedianMaintainer(input_file='Median.txt')
    median_sum = median_maintainer.sum_medians()
    print("Resp:", median_sum)

#Resp: 1213

Resp: 1213
