### 215. Kth Largest Element in an Array

### Heap 手動

* 時間複雜度: O( $n * log {_2} {k}$ )  
  其中 $n$ 為 nums 的長度。
  * heapify的時間複雜度 O( $k$ )
    * 節點數量分佈
      * 深度為 $d$ 的節點數量約為 $2^d$
    * 操作次數分佈
      * 最大深度為 $log {_2} {k}$
      * 深度 $d$ 的節點，進行`sift down`所需的最多次數為 $(log {_2} {k}) -d$
    * 總時間複雜度
      $$
      \text{總成本} = \sum_{d=0}^{\log_2 k} (\text{節點數量} \times \text{操作次數})
      = \sum_{d=0}^{\log_2 k} (2^d \times ((log_2 k) - d))
      $$
      約等於 O( $k$ )
* 空間複雜度: O( $k$ )

In [3]:
from typing import List

class Solution:
    def findKthLargest(self, nums: List[int], k: int) -> int: # time: O(k) + O((n-k) * log k) = O(n * log k)，因為 (n >= k) 
        print(f"{nums=}")
        
        heap = nums[:k] # space: O(k)
        self.heapify(heap) # time: O(k)

        # time: O((n-k) * log k)
        for num in nums[k:]: # time: O(n-k)
            if num > heap[0]:
                popped = self.heappop(heap) # time: O(log k)
                self.heappush(heap, num) # time: O(log k)

        return heap[0]
    
    def heapify(self, heap):
        """建立 min heap，時間複雜度 O(k)， k 為 heap 長度"""
        print(f"\n<< heapify >>\n")

        heap_size = len(heap)
        print(f"{heap=}, {heap_size=}, last non-leaf node: {heap_size//2 - 1}")

        # 從最後一個非葉節點 (index - 1) // 2 = ((heap_size-1) - 1) // 2  = (heap_size // 2 - 1) 開始往回做 sift down。(tip: 長度比索引多 1)
        for position in range((heap_size//2 - 1), -1, -1): # time: O(k)
            print("_" * 100)
            print(f"-> {heap=}, {position=}")
            self._sift_down(heap, position, heap_size)

    def heappop(self, heap):
        """取出並移除最小值，時間複雜度 O(log k)， k 為 heap 長度 (完全二叉樹的高度與節點數量呈對數關係)"""
        print(f"\n<< heappop >>\n")

        if not heap:
            return None
        
        min_value = heap[0] # 保存最小值以便返回
        print(f"{min_value=}")

        # 將最後一個節點值放到根節點，以保持完全二叉樹的結構
        heap[0] = heap[-1]
        print(f" (heap[0]={min_value}) change to (heap[-1]={heap[-1]}) -> {heap=}")
        heap.pop() # 刪除最後一個節點值
        print(f"-> pop: {heap[-1]=} -> {heap=}")

        # 如果 heap 不為空，則直接對根節點進行 sift down，其他節點不需要調整
        if heap:
            self._sift_down(heap, position=0, heap_size=len(heap))

        return min_value

    def heappush(self, heap, num):
        """插入新值，時間複雜度 O(log k)， k 為 heap 長度 (完全二叉樹的高度與節點數量呈對數關係)"""
        print(f"\n<< heappush >>\n")

        # 先將新值加到最後
        heap.append(num)
        print(f"-> push: {num=} -> {heap=}")

        # 對最後一個索引(新加入的值)進行 sift up
        self._sift_up(heap, position=len(heap) - 1)


    def _sift_down(self, heap, position, heap_size):
        """將節點值向上移動到適當位置"""
        item = heap[position] # 當前節點值
        print(f"\n--> {item=}\n")
        while position < heap_size:
            # 左右子節點索引
            left = 2 * position + 1
            right = 2 * position + 2
            print(f"--> {position=}, {left=}, {right=}")

            # 如果左子節點不存在，表示已經到達葉節點
            if left >= heap_size:
                print(f"--> ({left=}) >= ({heap_size=}) -> break (reached leaf node)")
                break
             
            # 找出左右子節點中較小的一個
            if (right < heap_size) and (heap[left] > heap[right]):
                smaller = right
                print(f"--> ({heap[left]=} > {heap[right]=}) -> {smaller=}")
            else:
                smaller = left
                print(f"--> ({right=} >= {heap_size=}) -> {smaller=}") if right >= heap_size else print(f"--> ({heap[left]=} <= {heap[right]=}) -> {smaller=}")

            # 如果父節點值小於等於左右子節點，表示下方已經是 min heap
            if item <= heap[smaller]:
                print(f"--> ({item=} <= {heap[smaller]=}) -> break (already min heap)")
                break
            
            print(f"--> (heap[{position}]={heap[position]}) change to (heap[{smaller}]={heap[smaller]})")
            heap[position] = heap[smaller] # 將較小的子節點上移
            position = smaller # 更新目前節點位置
            print(f"--> {heap=}, {position=}")
            print("-" * 100)

        heap[position] = item # 將原本節點值放入適當位置
        print(f"--> (heap[{position}]={heap[position]}) change to ({item=})")
        print(f"--> {heap=}")


    def _sift_up(self, heap, position):
        """將節點值向上移動到適當位置"""
        item = heap[position] # 當前節點值
        print(f"\n--> {item=}\n")

        while position > 0:
            parent = (position - 1) // 2 # 父節點索引
            print(f"--> {position=}, {parent=}")

            # 如果父節點值小於等於當前節點值，表示已經是 min heap
            if heap[parent] <= item:
                print(f"--> ({heap[parent]=} <= {item=}) -> break (already min heap)")
                break

            # 將父節點值下移
            print(f"--> (heap[{position}]={heap[position]}) change to (heap[{parent}]={heap[parent]})")
            heap[position] = heap[parent]
            position = parent
            print(f"--> {heap=}, {position=}")
            print("-" * 100)
        
        heap[position] = item # 將原本節點值放入適當位置


In [2]:
nums = [3,2,3,2,1,4,5,6,5]
k = 4
Solution().findKthLargest(nums, k) # 4

nums=[3, 2, 3, 2, 1, 4, 5, 6, 5]

<< heapify >>

heap=[3, 2, 3, 2], heap_size=4, last non-leaf node: 1
____________________________________________________________________________________________________
-> heap=[3, 2, 3, 2], position=1

--> item=2

--> position=1, left=3, right=4
--> (right=4 >= heap_size=4) -> smaller=3
--> (item=2 <= heap[smaller]=2) -> break (already min heap)
--> (heap[1]=2) change to (item=2)
--> heap=[3, 2, 3, 2]
____________________________________________________________________________________________________
-> heap=[3, 2, 3, 2], position=0

--> item=3

--> position=0, left=1, right=2
--> (heap[left]=2 <= heap[right]=3) -> smaller=1
--> (heap[0]=3) change to (heap[1]=2)
--> heap=[2, 2, 3, 2], position=1
----------------------------------------------------------------------------------------------------
--> position=1, left=3, right=4
--> (right=4 >= heap_size=4) -> smaller=3
--> (heap[1]=2) change to (heap[3]=2)
--> heap=[2, 2, 3, 2], position=3
------------

4

### Heap package

In [None]:
from typing import List
import heapq

class Solution:
    def findKthLargest(self, nums: List[int], k: int) -> int:
        print(f"{nums=}, {k=}")

        heap = nums[:k]
        heapq.heapify(heap)
        print(f"{nums[:k]=}")
        print(f"{heap=}\n")

        for num in nums[k:]:
            if num > heap[0]:
                popped = heapq.heappop(heap)
                heapq.heappush(heap, num)

                print(f"-> ({num} > {popped}) -> pop: {popped}, push: {num}")
                print(f"-> {heap=}\n")

        return heap[0]

In [None]:
nums = [3,2,3,1,2,4,5,5,6]
k = 4
Solution().findKthLargest(nums, k) # 4

### Sort

#### 1.先由大到小排序
#### 2.抓出新排序的第k個值

In [None]:
class Solution:
    def findKthLargest(self, nums, k):
        nums.sort(reverse = True)
        return nums[k-1]

Solution().findKthLargest(nums = [3,2,3,1,2,4,5,5,6], k = 2)

#### 1. 合併排序法
#### 2.抓出新排序的第k個值

In [None]:
class Solution:
    def findKthLargest(self, nums, k):

        def cut(nums):
            if len(nums) <= 1:
                return nums

            mid = len(nums) // 2
            left = nums[:mid]
            right = nums[mid:]

            l = cut(left)
            r = cut(right)

            result = merge(l, r)
            return result

        def merge(l, r):
            temp = []
            while (l != []) and (r != []):
                if l[0] < r[0]:
                    temp.append(l[0])
                    l.pop(0)
                else:
                    temp.append(r[0])
                    r.pop(0)

            if l == []:
                temp.extend(r)
            else:
                temp.extend(l)
            return temp


        new_nums = cut(nums)
        return new_nums[-k]

Solution().findKthLargest(nums = [3,2,3,1,2,4,5,5,6], k = 2)