In [None]:
import math
from tqdm import tqdm

In [None]:
filename = "sample.txt"
# filename = "input.txt"
with open(filename, encoding="utf-8") as f:
    data = f.read()

lines = data.strip().split(",")
ranges = []
for l in lines:
    ranges.append(tuple(map(int, l.split("-"))))

In [None]:
# Part 1
def leading_half(n) -> str:
    s = str(n)
    l = len(s)
    half_l = math.ceil(l / 2)
    # half_l = l // 2
    return s[:half_l]

def between(a, b, n):
    return (a <= n) and (n <= b)

In [None]:
invalid_ids = set()
for a, b in tqdm(ranges):
    start = leading_half(a)
    end = leading_half(b)
    for i in range(int(start), int(end) + 1):
        potential = int(str(i) * 2)
        if between(a, b, potential):
            invalid_ids.add(potential)

sum(invalid_ids)

In [None]:
# # Part 2
# # digits repeat at least twice
# # def substrings(s: str):
# #     l = len(s)
# #     for i in range(1, l + 1):
# #         yield(s[:i])

# def half_substrings(s: str):
#     l = math.ceil(len(s) / 2)
#     # for i in range(1, l + 1):
#     #     yield(s[:i])
#     return [s[:i] for i in range(1, l + 1)]

In [None]:
# # Doesn't work: Doesn't produce 111 for range 95-115
# invalid_ids = set()
# for a, b in tqdm(ranges):
#     max_digits = len(str(b))
#     search_stubs_a = list(map(int, half_substrings(str(a))))
#     search_stubs_b = list(map(int, half_substrings(str(b))))
#     # print(search_stubs_a, search_stubs_b)
#     start = min(search_stubs_a)
#     end = max(search_stubs_b)
#     for i in range(start, end + 1):
#         # This can create very long strings, so stop early when length > max_digits
#         potentials = (int(str(i) * n) for n in range(1, max_digits + 1))
#         for p in potentials:
#             if p > b:
#                 break
#             if between(a, b, int(p)):
#                 invalid_ids.add(int(p))

# invalid_ids
# # sum(invalid_ids)

In [None]:
## Part 2 - Brute force approach
# Takes about 3 sec
import re

pattern = re.compile(r"^(\d+)\1+$")
invalid_ids = set()
for a, b in tqdm(ranges, total=len(ranges)):
    for i in range(a, b + 1):
        if pattern.fullmatch(str(i)):
            invalid_ids.add(i)

sum(invalid_ids)

In [None]:
## Part 2 - Generative approach
# Alternate approach for fun: generate all invalid ids
def seq(n: int) -> list[int]:
    return list(range(pow(10, n)))

all_invalid_ids = set()
max_digits = 10  # Largest id in my input is 10 digits
for n in tqdm(range(2, max_digits + 1)):
    for seq_length in range(1, n // 2 + 1):
        # n must be divisible by sequence length
        if n % seq_length:
            continue
        copies = n // seq_length
        # print(f"{n=} {seq_length=} {copies=}")
        for s in seq(seq_length):
            candidate = str(s) * copies
            all_invalid_ids.add(int(candidate))

len(all_invalid_ids)

In [None]:
rs = [range(a, b + 1) for a, b in ranges]
invalid_ids = {candidate for candidate in tqdm(invalid_ids) if any(candidate in r for r in rs)}
sum(invalid_ids)

In [None]:
# Another alternative: Create a set with all ids in the ranges, then set intersection with invalid_ids 
# Creating id_ranges_set should be slower than the generative approach above
id_ranges_set = set()
for a, b in ranges:
    id_ranges_set.update(range(a, b + 1))
# len(id_ranges_set)

sum(invalid_ids & id_ranges_set)