In [94]:
import numpy as np
import pandas as pd
from scipy.spatial import distance
import itertools
import aocd

In [13]:
test_data = """162,817,812
57,618,57
906,360,560
592,479,940
352,342,300t
466,668,158
542,29,236
431,825,988
739,650,466
52,470,668
216,146,977
819,987,18
117,168,530
805,96,715
346,949,466
970,615,88
941,993,340
862,61,35
984,92,344
425,690,689"""

In [41]:
def parse_data(data):
    return np.array([[int(x) for x in line.split(",")] for line in data.split("\n")])
    

In [193]:
def create_dists_df(array):
    distance_matrix = distance.pdist(array, metric="euclidean")
    indices = itertools.combinations(range(len(array)), 2)
    df = pd.DataFrame(data = {"dists": distance_matrix, "indices": indices})
    return df

In [194]:
def solve_part_1(data, n_nodes = 10):
    data = parse_data(data)
    df = create_dists_df(data)
    df = df.sort_values("dists").iloc[:n_nodes]
    G = nx.Graph()
    G.add_edges_from(df.indices.to_list())
    connected_components = [len(c) for c in nx.connected_components(G)]
    connected_components.sort()
    prod = connected_components[-1] * connected_components[-2] * connected_components[-3]
    return prod

In [195]:
solve_part_1(test_data)

40

In [196]:
data = aocd.get_data()

In [197]:
solve_part_1(data, n_nodes=1000)

24360

In [198]:
def solve_part_2(data, n_nodes = 10):
    array = parse_data(data)
    df = create_dists_df(array)
    df = df.sort_values("dists")
    G = nx.Graph()
    G.add_edges_from(df.indices.iloc[:n_nodes].to_list())
    connected_components = nx.number_connected_components(G)
    n = n_nodes
    while connected_components > 1:
        n+=1
        next_edge = df.indices.iloc[n]
        G.add_edge(*next_edge)
        connected_components = nx.number_connected_components(G)
        print(
            f"Added edge {next_edge} between {array[next_edge[0], :]} and {array[next_edge[1], :]}."
            f" n_circuits = {connected_components}"
            )      
    print(f"All nodes connected when {df.indices.iloc[n]} were connected")
    node_n = df.iloc[n].indices[0]
    node_n1 = df.iloc[n].indices[1]
    x1 = array[node_n, :]
    x2 = array[node_n1, :]
    print(x1, x2)
    return prod

In [241]:
len(G.nodes)

838

In [251]:
def solve_part_2(data, n_nodes=10, debug = True):
    array = parse_data(data)
    df = create_dists_df(array)
    df = df.sort_values("dists")

    G = nx.Graph() #set up the graph
    n = 0
    n_components = len(array) #start somewhere

    while True:
        next_edge = df.indices.iloc[n] #add the next edge
        G.add_edge(*next_edge)
        n += 1
        n_components = nx.number_connected_components(G) #get number of connected components
        n_nodes_total = G.number_of_nodes() #get number of total nodes in graph

        if debug:
            print(
                f"Added edge {next_edge} between {array[next_edge[0]]} and {array[next_edge[1]]}."
                f" Distance was {df.dists.iloc[n]}"
                f" n_components = {n_components}"
                f" n_nodes = {n_nodes_total}"
            )
                    
        if n_components == 1 and n_nodes_total == len(array): #if we have only one connected component, and used all the nodes
            break


    last_edge = df.indices.iloc[n-1]
    print(f"All nodes connected when {last_edge} was added")

    # If you want to inspect:
    u, v = last_edge
    print(array[u], array[v])

    return array[u][0] * array[v][0]  # or whatever "prod" was intended to be

In [253]:
solve_part_2(test_data, debug=True)

Added edge (0, 19) between [162 817 812] and [425 690 689]. Distance was 321.560258738545 n_components = 1 n_nodes = 2
Added edge (0, 7) between [162 817 812] and [431 825 988]. Distance was 322.36935338211043 n_components = 1 n_nodes = 3
Added edge (2, 13) between [906 360 560] and [805  96 715]. Distance was 328.11888089532425 n_components = 2 n_nodes = 5
Added edge (7, 19) between [431 825 988] and [425 690 689]. Distance was 333.6555109690233 n_components = 2 n_nodes = 5
Added edge (17, 18) between [862  61  35] and [984  92 344]. Distance was 338.33858780813046 n_components = 3 n_nodes = 7
Added edge (9, 12) between [ 52 470 668] and [117 168 530]. Distance was 344.3893145845266 n_components = 4 n_nodes = 9
Added edge (11, 16) between [819 987  18] and [941 993 340]. Distance was 347.59890678769403 n_components = 5 n_nodes = 11
Added edge (2, 8) between [906 360 560] and [739 650 466]. Distance was 350.786259708102 n_components = 5 n_nodes = 12
Added edge (14, 19) between [346 949

np.int64(25272)

In [252]:
solve_part_2(data, debug = False)

All nodes connected when (316, 463) was added
[53148 96207  9295] [41127 88410 10248]


np.int64(2185817796)