In [46]:
## problem: given a x1...xn numbers one by one, at each time step i, the median of {x1...xn}
## use O(log(i)) time at each step i
## solution: maintain two heaps Hlow: support extract max (max heap) Hhigh: support extract min (min heap)
## when i comes in, compare it to max, min to decide which side it takes, then swap element around to maintain the size of two heaps
## heap property: a. maintain heap(min,max), b. read size of current heap, c. insert in, pop from top 

In [250]:
class HeapMin:    
    def __init__(self):
        self.heap = []
        self.size = 0
        
    @staticmethod
    def swap(a,b):
        c = a
        a = b
        b = c
        return a,b
    
    @staticmethod
    def find_p_index(index):
        if not index%2:
            p_index = (index-2)//2
        elif index%2:
            p_index = (index-1)//2
        return p_index
    
    @staticmethod
    def find_ch_index(index):
        l_child = 2*index+1
        r_child = 2*index+2
        return l_child, r_child        
    
#     def is_heap(self, a_list):
#         for index in reversed(range(len(a_list))):
#             if not index:
#                 pass
#             else:            
#                 p_item = a_list[self.find_p_index(index)]
#                 if p_item > a_list[index]:
#                     return False
#         return True
    
    def valid_position(self, index, p_index): # in use
        item  = self.heap[index]
        p_item = self.heap[p_index]
        if p_item > item:
            return False
        else:
            return True
        
    def _shiftup(self, item_index): # shift_up one level!
        if not item_index:
            return
        else:  
            p_index = self.find_p_index(item_index)

            if not self.valid_position(item_index, p_index):
                self.heap[p_index],self.heap[item_index] = self.swap(self.heap[p_index],self.heap[item_index])
                self._shiftup(p_index)            
        return 
            
    def _shiftdown(self, item_index): # in use        
        length_of_heap = len(self.heap)
      
        l_child, r_child = self.find_ch_index(item_index)
        if l_child > length_of_heap-1:
            return
        elif r_child > length_of_heap-1:
            smaller_child_index = l_child
        else:
            if self.heap[l_child] < self.heap[r_child]:
                smaller_child_index = l_child
            else:
                smaller_child_index = r_child

        if not self.valid_position(smaller_child_index,item_index):
            self.heap[item_index],self.heap[smaller_child_index] = self.swap(self.heap[item_index],self.heap[smaller_child_index])
            self._shiftdown(smaller_child_index) 
        return            
        
#     def heapify(self,a_list):
#         self.heap = a_list
        
#         for index in reversed(range(len(a_list))):
#             if index:
#                 self._shiftup(index)
#                 self._shiftdown(index)
#         return       
    
    def heappop(self):  # pop the smallest item from heap            
        result = self.heap[0]
        self.heap[0] = self.heap[-1]
        self.heap.pop(-1)
        self._shiftdown(0) 
        self.size -= 1
            
        return result
    
    def heappush(self, item):  # push a new item on the heap (from bottom position)         
        self.heap.append(item)
        self._shiftup(len(self.heap)-1)
        self.size += 1
    
    def heappushpop(self, item):  # add new item then pop the samllest
        
        self.heap.append(item)
        self._shiftup(len(self.heap)-1)
        result = self.heap[0]
        self.heap[0] = self.heap[-1]
        self.heap.pop(-1)
        self._shiftdown(0)
        return result
    
    def heapreplace(self, item):  # pop the current smallest item and add new item        
        result = self.heap[0]
        self.heap[0] = self.heap[-1]
        self.heap.pop(-1)
        self._shiftdown(0)
        self.heap.append(item)
        self._shiftup(len(self.heap)-1)        
        return result
    
        


In [311]:
class HeapMax:    
    def __init__(self):
        self.heap = []
        self.size = 0
        
    @staticmethod
    def swap(a,b):
        c = a
        a = b
        b = c
        return a,b
    
    @staticmethod
    def find_p_index(index):
        if not index%2:
            p_index = (index-2)//2
        elif index%2:
            p_index = (index-1)//2
        return p_index
    
    @staticmethod
    def find_ch_index(index):
        l_child = 2*index+1
        r_child = 2*index+2
        return l_child, r_child        
    
#     def is_heap(self, a_list):
#         for index in reversed(range(len(a_list))):
#             if not index:
#                 pass
#             else:            
#                 p_item = a_list[self.find_p_index(index)]
#                 if p_item > a_list[index]:
#                     return False
#         return True
    
    def valid_position(self, index, p_index): # in use
        item  = self.heap[index]
        p_item = self.heap[p_index]
        if p_item < item:
            return False
        else:
            return True
        
    def _shiftup(self, item_index): # shift_up one level!
        if not item_index:
            return
        else:  
            p_index = self.find_p_index(item_index)

            if not self.valid_position(item_index, p_index):
                self.heap[p_index],self.heap[item_index] = self.swap(self.heap[p_index],self.heap[item_index])
                self._shiftup(p_index)            
        return 
            
    def _shiftdown(self, item_index): # in use        
        length_of_heap = len(self.heap)
      
        l_child, r_child = self.find_ch_index(item_index)
        if l_child > length_of_heap-1:
            return
        elif r_child > length_of_heap-1:
            bigger_child_index = l_child
        else:
            if self.heap[l_child] > self.heap[r_child]:
                bigger_child_index = l_child
            else:
                bigger_child_index = r_child

        if not self.valid_position(bigger_child_index,item_index):
            self.heap[item_index],self.heap[bigger_child_index] = self.swap(self.heap[item_index],self.heap[bigger_child_index])
            self._shiftdown(bigger_child_index) 
        return            
        
#     def heapify(self,a_list):
#         self.heap = a_list
        
#         for index in reversed(range(len(a_list))):
#             if index:
#                 self._shiftup(index)
#                 self._shiftdown(index)
#         return       
    
    def heappop(self):  # pop the largest item from heap            
        result = self.heap[0]
        self.heap[0] = self.heap[-1]
        self.heap.pop(-1)
        self._shiftdown(0)
        self.size -= 1
            
        return result
    
    def heappush(self, item):  # push a new item on the heap (from bottom position)         
        self.heap.append(item)
        self._shiftup(len(self.heap)-1)
        self.size += 1
    
    def heappushpop(self, item):  # add new item then pop the largest
        
        self.heap.append(item)
        self._shiftup(len(self.heap)-1)
        result = self.heap[0]
        self.heap[0] = self.heap[-1]
        self.heap.pop(-1)
        self._shiftdown(0)
        return result
    
    def heapreplace(self, item):  # pop the current largest item and add new item        
        result = self.heap[0]
        self.heap[0] = self.heap[-1]
        self.heap.pop(-1)
        self._shiftdown(0)
        self.heap.append(item)
        self._shiftup(len(self.heap)-1)        
        return result
    

In [340]:
class HeapMM:
    def __init__(self):
        self.balance = True
        self.medians = []
        self.sum_med = 0
        
    
    @staticmethod
    def take_smaller(a,b):
        if a < b:
            return a
        else:
            return b
    
    def medians_after_addition(self, large_bag, small_bag, list_number):
        for index in range(len(list_number)):
            item = list_number[index]
            self._add_one_item(large_bag, small_bag, item)
            
        return self.medians

    def _add_one_item(self, large_bag, small_bag, item):  # should be pushpop instead of replace!!
        if len(large_bag.heap) == 0 and len(small_bag.heap) == 0:
            small_bag.heappush(item)
            self.balance = False
            self.medians.append(item)
            self.sum_med += item
        elif len(large_bag.heap) == 0:
            if item > self.medians[0]:
                large_bag.heappush(item)
            else:
                current_max = small_bag.heap[0]
                small_bag.heappushpop(item)
                large_bag.heappush(current_max)
            self.balance = True 
            new_med = self.take_smaller(small_bag.heap[0],large_bag.heap[0])
            self.sum_med += new_med
            self.medians.append(new_med)
                
        else:        
            min_in_large = large_bag.heap[0]
            max_in_small = small_bag.heap[0]
            
            if self.balance:  # balanced bags
                if item > max_in_small:                    
                    large_bag.heappush(item)
                    self.medians.append(large_bag.heap[0])
                    self.sum_med += large_bag.heap[0]

                else:
                    small_bag.heappush(item)
                    self.medians.append(small_bag.heap[0])                    
                    self.sum_med += small_bag.heap[0]
#                     print(self.medians[-1])
                self.balance = False

            else:  # unbalanced
                if large_bag.size > small_bag.size:
                    if item > max_in_small:
                        current_min = large_bag.heappushpop(item)
                        small_bag.heappush(current_min)
#                         print(item)
#                         print(self.medians[-1])
                    else:
                        small_bag.heappush(item)
                else:
                    if item < max_in_small:
                        current_max = small_bag.heappushpop(item)
                        large_bag.heappush(current_max)
                    else:
                        large_bag.heappush(item)
                new_median = self.take_smaller(small_bag.heap[0],large_bag.heap[0])
                self.medians.append(new_median)
                self.sum_med += new_median
                self.balance = True


In [341]:
# loading data
def load_data(file):
    with open(file) as f:
        data = []
        for line in f:
            data.append(int(line))
    return data

In [342]:
large_bag = HeapMin()
small_bag = HeapMax()
file = 'Median.txt'
data = load_data(file)

In [343]:
heap_m = HeapMM()

In [344]:
result = heap_m.medians_after_addition(large_bag, small_bag, data)

In [345]:
print(heap_m.sum_med)

46831213


In [25]:
a = []
print(a[0])

IndexError: list index out of range