# Heaps, Union-Find, Bloom/Count-Min
Objectives:
- Binary heap ops (push/pop) complexity demo
- Union-Find with path compression and union by rank
- Bloom filter / Count-Min Sketch accuracy experiment

In [None]:
# Starter imports
import heapq
import random
from collections import defaultdict
from pathlib import Path

# TODO: add heap demos, union-find, and sketch experiments

In [None]:
import heapq, random, timeit

def heap_demo(n=10):
    data = [random.randint(1, 100) for _ in range(n)]
    heapq.heapify(data)
    pops = [heapq.heappop(data) for _ in range(len(data))]
    return pops

def heap_push_pop_perf(n=10000):
    data = []
    return timeit.timeit(lambda: (heapq.heappush(data, random.random()), heapq.heappop(data)), number=n)

heap_demo(), heap_push_pop_perf(5000)

In [None]:
class UnionFind:
    def __init__(self, n):
        self.parent = list(range(n))
        self.rank = [0]*n

    def find(self, x):
        if self.parent[x] != x:
            self.parent[x] = self.find(self.parent[x])
        return self.parent[x]

    def union(self, a, b):
        ra, rb = self.find(a), self.find(b)
        if ra == rb: return False
        if self.rank[ra] < self.rank[rb]:
            ra, rb = rb, ra
        self.parent[rb] = ra
        if self.rank[ra] == self.rank[rb]:
            self.rank[ra] += 1
        return True

uf = UnionFind(5)
uf.union(0,1); uf.union(1,2); uf.union(3,4)
{i: uf.find(i) for i in range(5)}

In [None]:
import math, hashlib

class BloomFilter:
    def __init__(self, m: int, k: int):
        self.bits = 0
        self.m = m
        self.k = k

    def _hashes(self, item: str):
        for i in range(self.k):
            h = hashlib.blake2b(f"{item}:{i}".encode(), digest_size=4).digest()
            yield int.from_bytes(h, "big") % self.m

    def add(self, item: str):
        for h in self._hashes(item):
            self.bits |= (1 << h)

    def __contains__(self, item: str):
        return all(self.bits & (1 << h) for h in self._hashes(item))

bf = BloomFilter(m=64, k=3)
for word in ["apple", "banana", "cherry"]:
    bf.add(word)
"apple" in bf, "grape" in bf

In [None]:
class CountMinSketch:
    def __init__(self, width=64, depth=4):
        self.width = width
        self.depth = depth
        self.table = [[0]*width for _ in range(depth)]

    def _hashes(self, key):
        for i in range(self.depth):
            h = hashlib.blake2b(f"{key}:{i}".encode(), digest_size=4).digest()
            yield int.from_bytes(h, "big") % self.width

    def add(self, key, count=1):
        for row, h in enumerate(self._hashes(key)):
            self.table[row][h] += count

    def estimate(self, key):
        return min(self.table[row][h] for row, h in enumerate(self._hashes(key)))

cms = CountMinSketch()
for word in ["a", "b", "a", "c", "a", "b"]:
    cms.add(word)
cms.estimate("a"), cms.estimate("b"), cms.estimate("c"), cms.estimate("z")

Notes:
- Heaps give O(log n) push/pop; great for priority queues and streaming top-k.
- Union-Find enables near-constant amortized merges/queries for connectivity problems.
- Bloom filters and Count-Min Sketches trade exactness for sub-linear memory; false positives and overestimates only.