## 347. Top K Frequent Elements

* 時間複雜度: O( $n$ )
* 空間複雜度: O( $n$ )

In [1]:
from collections import defaultdict
from typing import List

class Solution:
    def topKFrequent(self, nums: List[int], k: int) -> List[int]:
        # 計算每個數字出現的次數
        counts = defaultdict(int) # space: O(m)，m 為不重複數字的個數，m ≈ n
        for num in nums: # time: O(n)
            counts[num] += 1

        # 建立桶子，索引代表出現次數，值為出現該次數的數字列表
        # 因為一個數字最多出現 len(nums) 次，所以桶子大小為 len(nums) + 1
        bucket = [[] for _ in range(len(nums) + 1)] # space: O(n)

        # 將每個數字放入對應次數的桶子中
        for num, count in counts.items(): # time: O(m)，m 為不重複數字的個數，m ≈ n
            bucket[count].append(num)

        # 從後往前遍歷桶子(從最高頻率開始)，收集前k個數字
        result = [] # space: O(k)，k < n
        for i in range(len(bucket) - 1, 0, -1): # time: O(n)
            for num in bucket[i]:
                result.append(num)
                if len(result) == k:
                    return result

In [2]:
nums = [1,1,1,2,2,3,3,3,3]; k = 2
Solution().topKFrequent(nums, k) # [3, 1]

[3, 1]

* 時間複雜度: O( $n + (m * log {_2} {k}) + k $ )
* 空間複雜度: O( $m + k$ )

可參考215. 手動建立heap

In [1]:
import heapq
from typing import List

class Solution:
    def topKFrequent(self, nums: List[int], k: int) -> List[int]:
        count_dict = {}  # 每個數字出現次數的字典 # space: O(m)，m 為不重複數字的個數，m ≈ n
        for num in nums: # time: O(n)
            # 更新字典中的計數，若該數字尚未存在則預設為 0
            count_dict[num] = count_dict.get(num, 0) + 1
        print(f"{count_dict=}")

        heap = []  # 最小堆，存儲前 k 個頻率最高的數字 # space: O(k)
        for num, count in count_dict.items(): # time: O(m), m 為不重複數字的個數，m ≈ n
            if len(heap) < k: # 當堆的長度小於 k，直接將 (頻率, 數字) 加入堆中                
                heapq.heappush(heap, [count, num]) # 將當前數字加入堆中，time: O(logk)
                print(f"{heap=}")
            elif count > heap[0][0]: # 當當前數字的頻率大於堆頂的頻率時，替換堆中的最小值
                heapq.heappush(heap, [count, num])  # 將當前數字加入堆中 # time: O(logk)
                print(f"{heap=}")
                popped = heapq.heappop(heap)  # 移除堆中的最小值 # time: O(logk)
                print(f"{popped=}, {heap=}")

        # 將堆中存儲的數字加入結果
        result = [] # space: O(k)
        for count, num in heap:
            result.append(num)
        print(f"{result=}")

        return result

In [2]:
nums = [1,1,1,2,2,3,3,3,3]; k = 2
Solution().topKFrequent(nums, k) # [3, 1]

count_dict={1: 3, 2: 2, 3: 4}
heap=[[3, 1]]
heap=[[2, 2], [3, 1]]
heap=[[2, 2], [3, 1], [4, 3]]
popped=[2, 2], heap=[[3, 1], [4, 3]]
result=[1, 3]


[1, 3]