In [16]:
import pandas as pd
import profiler
from typing import List

# Part 1

### Overview

The goal is to write a program that calculates the total distance between the two lists in the input file.

The distance is defined as the sum of the absolute differences between the i_th lowest elements in each list.

### Approach

Sort the lists, take the differences elementwise, and then sum the absolute values of each difference for the final distance. Use pandas for vectorization to speeed up calculation on larger inputs

In [7]:
#Pull in the data
datafile = "../../2024/data/day1_input.txt"
with open(datafile, 'r+') as f:
    lines = f.readlines()
    data = [x.rstrip().split() for x in lines]


In [15]:
@profiler.profile
def part1(data: List) -> int:
    #Convert data to DataFrame
    df = pd.DataFrame(data)
    # Convert to int, sort the values, then subtract line by line
    df = df.apply(lambda x: x.astype(int).sort_values().values)
    return (df[0] - df[1]).abs().sum()

In [17]:
part1(data)

Calling part1: Memory used 6377472 kB; Execution Time: 0.017478416208177805 s


765748

# Part 2

### Overview 

Calculate a total similarity score by adding up each number in the left list after multiplying it by the number of times that number appears in the right list.

### Approach

Count number of occurences of the left number

In [64]:
@profiler.profile
def part2(data: List) -> int:
    #Convert data to DataFrame and convert values to int
    df = pd.DataFrame(data).apply(lambda x: x.astype(int))
    # we find the values from the left that are contained in the right
    common_df = df.loc[df[1].isin(df[0]), 1]
    # Find the count of each number in common_df. Others are irrelevant because they will multiply by a count of 0.
    counts = common_df.value_counts()
    # Finally multiply the number of occurences (counts.values) by the number itself and sum them for the total score
    return sum(counts.values * counts.index)


Calling part2: Memory used 16384 kB; Execution Time: 0.0033610002137720585 s


27732508

In [None]:
part2(data)

In [36]:
15292 in df[0].values

True