In [None]:

"""
Optimal 3-bucket representation for month names, why write 12 months, if you can decompose them into parts of words and compress their data efficiently?
with timing, ETA, and a tqdm progress bar for Google Colab.
Sadly, optimal also means "Use a supercomputer". Or the better, alternative meaning of "Write way better code".
"""
import time
from itertools import combinations
from tqdm.notebook import tqdm

# Start Timer
t0_total = time.time()
MONTHS = [
    "january", "february", "march", "april",
    "may", "june", "july", "august",
    "september", "october", "november", "december"
]

# 1 Generate every legal split for every month
SPLITS = {}
PATTERNS_2 = [("1","2"), ("2","3"), ("1","3")]
PATTERNS_3 = [("1","2","3")]
for m in MONTHS:
    options = []
    n = len(m)
    for i in range(1, n):
        a, b = m[:i], m[i:]
        for pat in PATTERNS_2:
            options.append((pat, (a, b)))
    for i, j in combinations(range(1, n), 2):
        a, b, c = m[:i], m[i:j], m[j:]
        options.append((("1","2","3"), (a, b, c)))
    SPLITS[m] = options

# 2 DFS with an adjustable bucket-size limit
def solve(limit):
    order = sorted(MONTHS, key=len, reverse=True)
    buckets = {"1": set(), "2": set(), "3": set()}
    chosen = {}

    def dfs(k):
        if k == len(order):
            return True
        m = order[k]
        cand = sorted(
            SPLITS[m],
            key=lambda x: sum(p not in buckets[b] for b, p in zip(*x))
        )
        for pattern, parts in cand:
            added = []
            ok = True
            for b, p in zip(pattern, parts):
                if p not in buckets[b]:
                    if len(buckets[b]) >= limit:
                        ok = False
                        break
                    added.append((b, p))
            if not ok:
                continue
            for b, p in added:
                buckets[b].add(p)
            chosen[m] = (pattern, parts)
            if dfs(k + 1):
                return True
            for b, p in added:
                buckets[b].remove(p)
            del chosen[m]
        return False

    return dfs(0), buckets, chosen

print("Computing optimal bucket limit with tqdm progress bar...")
for LIM in tqdm(range(2, 13), desc="Scanning bucket size limits"):
    iter_start = time.time()
    ok, B, CH = solve(LIM)
    iter_duration = time.time() - iter_start
    total_elapsed = time.time() - t0_total
    trials = LIM - 1
    avg_per = total_elapsed / trials if trials else 0
    remaining = (13 - LIM) * avg_per if LIM < 13 else 0
    tqdm.write(
        f"Limit={LIM:2d} | this iter: {iter_duration:.2f}s"
        f" | total: {total_elapsed:.2f}s"
        f" | est remain: {remaining:.2f}s"
    )
    if ok:
        break

print(f"\n‼ optimal max-bucket size = {LIM}\n")
for i in "123":
    print(f"Bucket {i} ({len(B[i])} entries): {sorted(B[i])}")
print()
print("Month → chosen pieces:")
for m in MONTHS:
    pat, parts = CH[m]
    print(f"{m:<9}  {' → '.join(parts)}   ({'→'.join(pat)})")

total_time = time.time() - t0_total
print(f"\nTotal elapsed time: {total_time:.2f}s")

Computing optimal bucket limit with tqdm progress bar...


Scanning bucket size limits:   0%|          | 0/11 [00:00<?, ?it/s]

Limit= 2 | this iter: 0.55s | total: 0.58s | est remain: 6.39s
Limit= 3 | this iter: 48.04s | total: 48.62s | est remain: 243.11s


KeyboardInterrupt: 