In [44]:
day=14

def read_data(filename):
    f = open(filename)
    parse = lambda l : l.strip().split(' -> ')
    chain = f.readline().strip()
    f.readline()
    polymers = {parse(line)[0]:parse(line)[1] for line in f.readlines()}
    f.close()
    return (chain,polymers)

def polymerize(pairs, polymers):
    polymerized = {}
    def add(element, count):
        if element in polymerized:
            polymerized[element] += count
        else:
            polymerized[element] = count

    for key,value in pairs.items():
        insertion = polymers[key]
        left = key[0] + insertion
        right = insertion + key[1]

        add(left, value)
        add(right, value)

    return polymerized

def determine_elements(polymers):
    elements = set()
    def add(element):
        if not element in elements:
            elements.add(element)

    for key in polymers.keys():
        add(key[0])
        add(key[1])

    return elements

def determine_difference(data, steps):
    chain, polymers= data

    pairs = {}

    for i in range(1, len(chain)):
        pair = chain[i-1:i+1]

        if pair in pairs:
            pairs[pair] += 1
        else:
            pairs[pair] = 1

    elements = determine_elements(polymers)
    print(f'\telements: {elements}')

    for step in range(0,steps):
        pairs = polymerize(pairs, polymers)
        #print(f'Step {step} {sum([value for _,value in pairs.items()])} pairs')

    occurences = {}
    for key, value in pairs.items():
        if key[0] in occurences:
            occurences[key[0]] += value
        else:
            occurences[key[0]] = value
        if key[1] in occurences:
            occurences[key[1]] += value
        else :
            occurences[key[1]] = value
    for e in chain:
        occurences[e] += 1
    frequency = [(key,value//2) for key, value in occurences.items()]
    frequency.sort(key=lambda t : t[1])
    print(f'\tFrequency analysis: {frequency}')
    return frequency[-1][1] - frequency[0][1]

sample = read_data(f'day{day}.sample.dat')

chain, polymers= sample
difference = determine_elements(polymers)

print(f'[SAMPLE]')
difference = determine_difference(sample, 10)
print(f'After 10 steps the difference was {difference}')
if difference != 1588:
    raise ValueError(f'At step 10 expected a difference of 1588 but was {difference}')

input = read_data(f'day{day}.dat')
print('INPUT')
print(input)

difference = determine_difference(input, 10)
print(f'After 10 steps the difference was {difference}')
if difference != 2112:
    raise ValueError(f'Expected 2112, drifted to {difference}')

[SAMPLE]
	elements: {'N', 'H', 'B', 'C'}
	Frequency analysis: [('H', 161), ('C', 298), ('N', 865), ('B', 1749)]
After 10 steps the difference was 1588
INPUT
('SCSCSKKVVBKVFKSCCSOV', {'CP': 'C', 'SF': 'S', 'BH': 'F', 'SS': 'N', 'KB': 'N', 'NO': 'N', 'BP': 'F', 'NK': 'P', 'VP': 'H', 'OF': 'O', 'VH': 'O', 'FV': 'F', 'OP': 'V', 'FP': 'B', 'VB': 'B', 'OK': 'S', 'BS': 'B', 'SK': 'P', 'VV': 'H', 'PC': 'S', 'HV': 'K', 'PS': 'N', 'VS': 'O', 'HF': 'B', 'SV': 'C', 'HP': 'O', 'NF': 'V', 'HB': 'F', 'VO': 'B', 'VN': 'N', 'ON': 'H', 'KV': 'K', 'OV': 'F', 'HO': 'H', 'NB': 'K', 'CB': 'F', 'FF': 'H', 'NH': 'F', 'SN': 'N', 'PO': 'O', 'PH': 'C', 'HH': 'P', 'KF': 'N', 'OH': 'N', 'KS': 'O', 'FH': 'H', 'CC': 'F', 'CK': 'N', 'FC': 'F', 'CF': 'H', 'HN': 'B', 'OC': 'F', 'OB': 'K', 'FO': 'P', 'KP': 'N', 'NC': 'P', 'PN': 'O', 'PV': 'B', 'CO': 'C', 'CS': 'P', 'PP': 'V', 'FN': 'B', 'PK': 'C', 'VK': 'S', 'HS': 'P', 'OS': 'N', 'NP': 'K', 'SB': 'F', 'OO': 'F', 'CV': 'V', 'BB': 'O', 'SH': 'O', 'NV': 'N', 'BN': 'C', 'KN

ValueError: Expected 2112, drifted to 2111

## --- Part Two ---
The resulting polymer isn't nearly strong enough to reinforce the submarine. You'll need to run more steps of the pair insertion process; a total of 40 steps should do it.

In the above example, the most common element is B (occurring 2192039569602 times) and the least common element is H (occurring 3849876073 times); subtracting these produces 2188189693529.

Apply 40 steps of pair insertion to the polymer template and find the most and least common elements in the result. What do you get if you take the quantity of the most common element and subtract the quantity of the least common element?

In [43]:
difference = determine_difference(sample, 40)
print('SAMPLE')
print(f'After 40 steps the chain was had a difference of {difference}')
if difference != 2188189693529:
    raise ValueError(f'Expected a difference of 2188189693529 but was {difference}')


difference = determine_difference(input, 40)
print('INPUT')
print(f'After 40 steps the chain was had a difference of {difference}')
print(f'Off by one error, corrected difference is {difference+1}')

	elements: {'N', 'H', 'B', 'C'}
	Frequency analysis: [('H', 7699752146), ('C', 13195270603), ('N', 2192095604707), ('B', 4384079139204)]
SAMPLE
After 40 steps the chain was had a difference of 2188189693529
	elements: {'K', 'P', 'S', 'N', 'O', 'F', 'B', 'C', 'V', 'H'}
	Frequency analysis: [('V', 1148044499235), ('K', 2595592662598), ('S', 2979975943802), ('C', 3461297536896), ('B', 4064137581221), ('H', 4431423403000), ('F', 4948148083993), ('N', 5170175931794), ('P', 5347059413908), ('O', 7635586799061)]
INPUT
After 40 steps the chain was had a difference of 3243771149913
Off by one error, corrected difference is 3243771149914
