# day 8

https://adventofcode.com/8/day/8

In [None]:
import collections
import logging
import logging.config
import os

import yaml
from scipy.spatial.distance import pdist

In [None]:
with open('../logging.yaml') as fp:
    logging_config = yaml.load(fp, Loader=yaml.FullLoader)

logging.config.dictConfig(logging_config)

In [None]:
FNAME = os.path.join('data', 'day08.txt')

LOGGER = logging.getLogger('day08')

## part 1

### problem statement:

#### loading data

In [None]:
test_data = """162,817,812
57,618,57
906,360,560
592,479,940
352,342,300
466,668,158
542,29,236
431,825,988
739,650,466
52,470,668
216,146,977
819,987,18
117,168,530
805,96,715
346,949,466
970,615,88
941,993,340
862,61,35
984,92,344
425,690,689"""

In [None]:
def load_data(fname=FNAME):
    with open(fname) as fp:
        return fp.read()

In [None]:
import io

import numpy as np
import pandas as pd

def parse_raw_data(data: str) -> np.ndarray:
    return pd.read_csv(io.StringIO(data), header=None).values

In [None]:
parse_raw_data(data=test_data)

b#### function def

In [None]:
import collections
from scipy.cluster.hierarchy import fcluster, linkage


def build_clusters(a: np.ndarray, num_links: int = 10) -> np.ndarray:
    p = pdist(a, metric='euclidean')
    l = linkage(p, method='single', optimal_ordering=True)

    d_thresh = sorted(p)[num_links]
    num_cluster_steps = np.argmax(l[:, 2] > d_thresh) - 1
    num_clusters = a.shape[0] - num_cluster_steps
    f = fcluster(l, criterion='maxclust', t=num_clusters)
    return f

build_clusters(a=parse_raw_data(data=test_data))

In [None]:
import math

def q_1(data, num_links: int = 10):
    a = parse_raw_data(data=data)
    f = build_clusters(a=a, num_links=num_links)
    c = collections.Counter(f)
    return math.prod(size for (cluster_id, size) in c.most_common(n=3))

#### tests

In [None]:
def test_q_1():
    LOGGER.setLevel(logging.DEBUG)
    assert q_1(test_data) == 40, f"{q_1(test_data) = }"
    LOGGER.setLevel(logging.INFO)

In [None]:
test_q_1()

#### answer

In [None]:
q_1(load_data(), num_links=1_000)

## part 2

### problem statement:

#### function def

In [None]:
def q_2(data):
    a = parse_raw_data(data=data)
    p = pdist(a, metric='euclidean')
    l = linkage(a, method='single', optimal_ordering=True)
    d = l[-1, 2]

    m = a.shape[0]
    pdist_lookup_map = {(m * i + j - ((i + 2) * (i + 1)) // 2): (i, j)
                        for i in range(m - 1)
                        for j in range(i + 1, m)}

    i_pd_last_pair = np.argmax(p == d)
    i, j = pdist_lookup_map[i_pd_last_pair]
    return a[[i, j], 0].prod()

#### tests

In [None]:
def test_q_2():
    LOGGER.setLevel(logging.DEBUG)
    assert q_2(test_data) == 25_272
    LOGGER.setLevel(logging.INFO)

In [None]:
test_q_2()

#### answer

In [None]:
q_2(load_data())

fin