# Day 14 - Extended Polymerization

https://adventofcode.com/2021/day/14

In [29]:
from pathlib import Path

INPUTS = Path("input.txt").read_text().strip().split("\n")
POLY_TEMPLATE = INPUTS[0]
PAIR_INSERT_RULES = {}
for pair in INPUTS[2:]:
    key, val = pair.split(" -> ")
    PAIR_INSERT_RULES[key] = val


In [30]:
def insertion_step(template: str, rules: dict = PAIR_INSERT_RULES) -> str:
    output = template[0]
    for i in range(len(template) - 1):
        pair = template[i : i + 2]
        if pair in rules:
            output += rules[pair]
        output += pair[-1]
    return output


def test_insertion():
    template = "NNCB"
    rules = {
        "CH": "B",
        "HH": "N",
        "CB": "H",
        "NH": "C",
        "HB": "C",
        "HC": "B",
        "HN": "C",
        "NN": "C",
        "BH": "H",
        "NC": "B",
        "NB": "B",
        "BN": "B",
        "BB": "N",
        "BC": "B",
        "CC": "N",
        "CN": "C",
    }
    round1 = insertion_step(template=template, rules=rules)
    assert round1 == "NCNBCHB"
    round2 = insertion_step(template=round1, rules=rules)
    assert round2 == "NBCCNBBBCBHCB"
    round3 = insertion_step(template=round2, rules=rules)
    assert round3 == "NBBBCNCCNBBNBNBBCHBHHBCHB"
    round4 = insertion_step(template=round3, rules=rules)
    assert round4 == "NBBNBNBBCCNBCNCCNBBNBBNBBBNBBNBBCBHCBHHNHCBBCBHCB"


test_insertion()


In [31]:
polymer = POLY_TEMPLATE
for _ in range(10):
    polymer = insertion_step(template=polymer)

chars = set(polymer)
counts = []
for char in chars:
    counts.append((polymer.count(char), char))

counts = sorted(counts)
least_common = counts[0]
most_common = counts[-1]
print(f"{least_common=}; {most_common=}")

diff = most_common[0] - least_common[0]
print(f"Difference: {diff}")

least_common=(530, 'S'); most_common=(3505, 'C')
Difference: 2975


## Part 2

I admit I ducked into the spoilers room on Python Discord for some direction here, as originally I just tried running my original method unchanged 40 times. Silly me forgot that this was an exponential growth problem, so the calculation would start taking several minutes per iteration around the 21st step. If I had let it run til the 40th, it might have kept going for a month and blown up my laptop.

The tip I took away was one I really should have thought of before, as I had on Day 6. Despite the arrangement of the problem on the site, the real solution doesn't require each character or pair be unique. Instead, we can just build a mapping of pairs to the number of times those pairs exist in the chain.

The trick is that each pair separates into a new set of pairs on each iteration, so the original is not preserved. For a rule of `FP -> N`, for example, the pair `FP` would separate into pairs `FN` and `NP`.

With that in mind, generating the set of pairs in the polymer is as simple as preserving the *number* of each unique pair. The easiest way to achieve that, I find, is a `defaultdict(int)`.

In [32]:
from collections import defaultdict


def insertion_step2(
    pair_counts: dict[str, int],
    rules: dict[str, str] = PAIR_INSERT_RULES,
) -> dict[int]:
    # Start with an empty set of the pair counts to avoid changing the original
    new_pair_counts = defaultdict(int)
    for pair, num in pair_counts.items():
        if pair in PAIR_INSERT_RULES:
            # Generate the two new pairs based on this insertion rule
            left, right = pair
            insertion = PAIR_INSERT_RULES[pair]
            new_pair_counts[left + insertion] += num
            new_pair_counts[insertion + right] += num
        else:
            # On the off chance no conversion rule exists,
            # add back the original pair to the new counts
            new_pair_counts[pair] += num
    return new_pair_counts


To start off, the original `POLY_TEMPLATE` also needs to be re-processed into an initial set of pair counts. We do this by iterating through indices and pulling slices into the initial defaultdict:

In [33]:
pair_counts = defaultdict(int)
for x in range(len(POLY_TEMPLATE) - 1):
    pair_counts[POLY_TEMPLATE[x : x + 2]] += 1


From there, we can run our calculation, which takes milliseconds again instead of minutes.

In [34]:
ITERATIONS = 40
for _ in range(ITERATIONS):
    pair_counts = insertion_step2(pair_counts=pair_counts)


Finally, the character count to find most and least common.

Two facts are important to keep in mind when counting with this method:

1. For every pair in the `pair_counts`, only **one** character of each pairing is the unique character; the other is shared with some other pairing, though we don't keep track of ordering, so we can't be sure of which one.
2. Both the **first** and **last** characters of the final polymer will always be the same as the first and last of the initial template.

Given this, I chose to consider the **first** character of the template along with every **last** character of the final pairs, in order to count unique characters throughout the polymer.

With that in hand, we just sum the values of the pair counts.

In [35]:
char_counts = defaultdict(int)
# Add on the first character of the template, as that will always be present
char_counts[POLY_TEMPLATE[0]] += 1
for pair, count in pair_counts.items():
    # Add on the LAST letter of each pair only,
    # as that will always be the unique character we need to count
    char_counts[pair[1]] += count

# Sort our character counts by their values to arrange them from least- to most common.
sorted_chars = sorted((v, k) for k, v in char_counts.items())

# Pull those least and most common characters out of the sorted results
least_common, most_common = sorted_chars[0], sorted_chars[-1]
print(f"{least_common=}, {most_common=}")

# Calculate the diff for our solution.
diff = most_common[0] - least_common[0]
print(f"Difference: {diff}")


least_common=(594452039903, 'S'), most_common=(3609835890592, 'C')
Difference: 3015383850689
