Median is the middle value in an ordered integer list. If the size of the list is even, there is no middle value. So the median is the mean of the two middle value.
For example,

[2,3,4], the median is 3

[2,3], the median i
Design a data structure that supports the following two operations:

    void addNum(int num) - Add a integer number from the data stream to the data structure.
    double findMedian() - Return the median of all elements so far.

 

Example:

addNum(1)
addNum(2)
findMedian() -> 1.5
addNum(3) 
findMedian() -> 2

 

Follow up:

    If all integer numbers from the stream are between 0 and 100, how would you optimize it?
    If 99% of all integer numbers from the stream are between 0 and 100, how would you optimize it?

# Simple Sorting - O(n*log n) addNum, O(1) findMedian runtime, O(n) space

In [1]:
class MedianFinder:

    def __init__(self):
        """
        initialize your data structure here.
        """
        self.lst = []
        

    def addNum(self, num: int) -> None:
        self.lst.append(num)
        self.lst = sorted(self.lst)

    def findMedian(self) -> float:
        length = len(self.lst)
        if length % 2 != 0:
            return self.lst[length // 2]
        else:
            return (self.lst[length // 2] + self.lst[length // 2 - 1]) / 2

# Your MedianFinder object will be instantiated and called as such:
# obj = MedianFinder()
# obj.addNum(num)
# param_2 = obj.findMedian()

# Optimization if 99-100% numbers are between 1 and 100 - Dictionary - O(1) addNum, O(n) findMedian runtime, O(k) space where k is the number of unique numbers

In [2]:
class MedianFinder:

    def __init__(self):
        """
        initialize your data structure here.
        """
        self.num_dict = dict()
        

    def addNum(self, num: int) -> None:
        if num in self.num_dict:
            self.num_dict[num] += 1
        else:
            self.num_dict[num] = 1

    def findMedian(self) -> float:
        num_dict_keys = sorted(list(self.num_dict.keys()))
        length = sum(list(self.num_dict.values()))
        is_even = length % 2 == 0
        med_length = length // 2 + 1
        
        curr = ctr = result = even_added = 0
        while curr <= med_length:
            
            curr += self.num_dict[num_dict_keys[ctr]]
            if is_even and not even_added and curr >= med_length - 1:
                result += num_dict_keys[ctr]
                even_added = 1
            if curr >= med_length:
                break
            ctr += 1
        
        result += num_dict_keys[ctr]
        
        if is_even:
            return result / 2
        
        return result

# Your MedianFinder object will be instantiated and called as such:
# obj = MedianFinder()
# obj.addNum(num)
# param_2 = obj.findMedian()

# Binary Search and Insert - O(n) addNum, O(1) findMedian runtime, O(n) space

In [3]:
class MedianFinder:

    def __init__(self):
        """
        initialize your data structure here.
        """
        self.lst = []
    
    def binary_search(self, num: int) -> int:
        right = len(self.lst)
        left = 0
        mid = (left + right) // 2
        
        while left < right:
            
            if num == self.lst[mid]:
                return mid
            elif num < self.lst[mid]:
                right = mid
            else:
                left = mid + 1
            
            mid = (left + right) // 2
        
        return mid

    def addNum(self, num: int) -> None:
        idx = self.binary_search(num)
        self.lst.insert(idx, num)

    def findMedian(self) -> float:
        length = len(self.lst)
        if length % 2 != 0:
            return self.lst[length // 2]
        else:
            return (self.lst[length // 2] + self.lst[length // 2 - 1]) / 2

# Your MedianFinder object will be instantiated and called as such:
# obj = MedianFinder()
# obj.addNum(num)
# param_2 = obj.findMedian()

# Two heaps - O(log n) addNum, O(1) findMedian runtime, O(n) space

In [4]:
import heapq

class MedianFinder:

    def __init__(self):
        """
        initialize your data structure here.
        """
        self.lo = [] # max heap
        self.hi = [] # min heap

    def addNum(self, num: int) -> None:
        # Add to max heap
        heapq.heappush(self.lo, -1 * num)
        
        # Balancing step
        low_top = -1 * heapq.heappop(self.lo)
        heapq.heappush(self.hi, low_top)
        
        # Maintain size property
        if len(self.lo) < len(self.hi):
            high_top = heapq.heappop(self.hi)
            heapq.heappush(self.lo, -1 * high_top)

    def findMedian(self) -> float:
        if len(self.lo) > len(self.hi):
            return -1 * self.lo[0]
        
        return (-1 * self.lo[0] + self.hi[0]) / 2

# Your MedianFinder object will be instantiated and called as such:
# obj = MedianFinder()
# obj.addNum(num)
# param_2 = obj.findMedian()