# HEAPS


## K Most Frequent Strings - Medium


In [None]:
from collections import Counter
import heapq
from typing import List


class Pair:
    def __init__(self, str, freq):
        self.str = str
        self.freq = freq

    def __lt__(self, other):
        if self.freq == other.freq:
            return self.str < other.str
        return self.freq > other.freq

    def __repr__(self):
        return f"({self.str}, {self.freq})"  # Display both attributes


def k_most_frequent_strings(strs: List[str], k: int) -> List[str]:
    # get lookup table
    freqs = Counter(strs)
    # create a list for heap using pair class
    max_heap = [Pair(str, freq) for str, freq in freqs.items()]
    print(max_heap)  # before max heap
    heapq.heapify(max_heap)
    print(max_heap)  # after max heap
    return [heapq.heappop(max_heap).str for _ in range(k)]


# strs = ["coding", "interview"]
strs = [
    "go",
    "coding",
    "byte",
    "byte",
    "go",
    "interview",
    "go",
    "python",
    "go",
    "python",
    "c++",
    "c++",
    "c++",
    "rust",
    "rust",
]
k = 2

k_most_frequent_strings(strs, k)

[(go, 4), (coding, 1), (byte, 2), (interview, 1), (python, 2), (c++, 3), (rust, 2)]
[(go, 4), (python, 2), (c++, 3), (interview, 1), (coding, 1), (byte, 2), (rust, 2)]


['go', 'c++']

In [None]:
# to convert a list in first min heap then into a max heap
a_list = [30, 16, 8, 10, 15, 50, 60]
b_list = [-item for item in a_list]
heapq.heapify(b_list)

a_list = [-item for item in b_list]

print(a_list)

[60, 16, 50, 10, 15, 30, 8]


In [None]:
# k most freq strings using min heap keeping only less than k
from collections import Counter
import heapq
from typing import List


class Pair:
    def __init__(self, str, freq):
        self.str = str
        self.freq = freq

    # tells if self has to be placed on top of min heap
    def __lt__(self, other):
        if self.freq == other.freq:
            return self.str > other.str
        return self.freq < other.freq

    def __repr__(self):
        return f"({self.str}, {self.freq})"  # Display both attributes


def k_most_frequent_strings_min_heap(strs: List[str], k: int) -> List[str]:
    freqs = Counter(strs)
    min_heap = []
    for str, freq in freqs.items():
        heapq.heappush(min_heap, Pair(str, freq))
        if len(min_heap) > k:
            heapq.heappop(min_heap)

    res = [heapq.heappop(min_heap).str for _ in range(k)]
    res.reverse()
    return res


# strs = ["coding", "interview"]
strs = [
    "go",
    "coding",
    "byte",
    "byte",
    "go",
    "interview",
    "go",
    "python",
    "go",
    "python",
    "c++",
    "c++",
    "c++",
    "rust",
    "rust",
]
k = 2

k_most_frequent_strings_min_heap(strs, k)

['c++', 'go']

```yaml
# First: "go": 3
pair = Pair("go", 3)
# No comparisons - first element

# Second: "coding": 1
pair = Pair("coding", 1)
# heapq calls __lt__: Is Pair("coding", 1) < Pair("go", 3)?
# frequencies different: 1 < 3 is True
# "coding" moves BEFORE "go" because 1 < 3
heap = [Pair("coding", 1), Pair("go", 3)]

# Third: "byte": 2
pair = Pair("byte", 2)
# Is Pair("byte", 2) < Pair("coding", 1)?
# frequencies different: 2 < 1 is False, so "byte" doesn't go before "coding"
# Is Pair("byte", 2) < Pair("go", 3)?
# frequencies different: 2 < 3 is True, so "byte" goes before "go"
heap = [Pair("coding", 1), Pair("byte", 2), Pair("go", 3)]

# Fourth: "interview": 1
pair = Pair("interview", 1)
# Is Pair("interview", 1) < Pair("coding", 1)?
# frequencies equal: "interview" > "coding" is True
# so "interview" comes after "coding"
heap = [Pair("coding", 1), Pair("interview", 1), Pair("byte", 2), Pair("go", 3)]
```


Let me create a detailed visualization for how the min-heap gets built with your input, focusing on each `__lt__` comparison that happens. I'll mark each TRUE/FALSE with what it means for heap ordering.

```python
# Start with empty heap: []

# 1. Add "go" (freq=3)
# First element, no comparisons needed
heap = [("go", 3)]

# 2. Add "coding" (freq=1)
# heapq asks: Is coding < go?
__lt__(coding, go):
    coding.freq < go.freq  # 1 < 3
    returns True  # "True means coding SHOULD GO BEFORE go"
heap = [("coding", 1), ("go", 3)]

# 3. Add "byte" (freq=2)
# heapq asks: Is byte < coding?
__lt__(byte, coding):
    byte.freq < coding.freq  # 2 < 1
    returns False  # "False means byte should NOT GO BEFORE coding"
# heapq then asks: Is byte < go?
__lt__(byte, go):
    byte.freq < go.freq  # 2 < 3
    returns True  # "True means byte SHOULD GO BEFORE go"
heap = [("coding", 1), ("byte", 2), ("go", 3)]

# 4. Add "interview" (freq=1)
# heapq asks: Is interview < coding?
__lt__(interview, coding):
    interview.freq == coding.freq  # 1 == 1
    interview.str > coding.str  # "interview" > "coding"
    returns True  # "True means interview SHOULD GO BEFORE coding"
heap = [("interview", 1), ("coding", 1), ("byte", 2), ("go", 3)]
```

The key insight is:

- When `__lt__` returns True, heapq puts that element BEFORE the one it's being compared with
- When `__lt__` returns False, heapq puts that element AFTER the one it's being compared with

In a min-heap:

1. For frequencies: we want smaller frequencies first, so we use `<`

   - If freq1 < freq2 returns True → freq1 goes before freq2
   - Example: 1 < 3 is True, so freq=1 goes before freq=3

2. For equal frequencies: we're using `>` for strings
   - If str1 > str2 returns True → str1 goes before str2
   - Example: "interview" > "coding" is True, so "interview" goes before "coding"

This creates a min-heap ordered by:

1. Smallest frequencies at the top
2. For equal frequencies, lexicographically larger strings at the top

The final heap order: ["interview"(1), "coding"(1), "byte"(2), "go"(3)]

Does this help clarify how the comparisons determine the heap ordering?


In [12]:
"interview" > "coding"

True

## Combine Sorted Linked Lists - Medium


## Median of an Integer Stream - Hard


## Sort a K-Sorted Array - Medium
