# Advent of Code - Day1:

### Things learned:

- Triple quotation marks are used to create multi-line strings.
- You can use `inplace=True` to assign when sorting a pandas dataframe but not the columns of a dataframe.

### Things to remember:

- List comprehension syntax `newlist = [expression for item in iterable if condition == True]`

### Input:

In [1]:
def read_file(file_path = "../inputs/day1.txt"):
    # Read the file and split lines
    with open(file_path, "r") as file:
        return file.read()

### Solution:

In [2]:
def total_distance(input_data):
    lines = input_data.strip().split("\n")
    # Read and format into to two lists
    column1, column2 = [], []
    for line in lines:
        a, b = line.split()
        column1.append(int(a))
        column2.append(int(b))
    
    # Sort the columns
    column1.sort()
    column2.sort()
    
    # Calculate and return total sum
    sum = 0
    for i in range(len(column1)):
        sum += abs(column1[i] - column2[i])
            
    return (sum)

In [3]:
input_data = read_file()
total_distance(input_data)

1938424

###  Pandas Solution:

In [4]:
import pandas as pd

In [5]:
def total_distance(input_data):
    # Read and format into to a list of lists of two ints
    lines = input_data.strip().split("\n")
    data = [list(map(int, line.split())) for line in lines]
    df = pd.DataFrame(data, columns=['column1', 'column2'])
    
    # Sort the columns
    df['column1'] = df['column1'].sort_values()
    df['column2'] = df['column2'].sort_values()
    
    sum = (df['column1'] - df['column2']).abs().sum()
    return sum

def total_similarity(input_data):
    # Read and format into to a list of lists of two ints
    lines = input_data.strip().split("\n")
    data = [list(map(int, line.split())) for line in lines]
    df = pd.DataFrame(data, columns=['column1', 'column2'])
    
    # Find similarity
    df['similarity'] = df['column1'].map(df['column2'].value_counts())*df['column1']
    sum = df['similarity'].sum()
    return sum

In [6]:
input_data = read_file()
total_distance(input_data)

30675430

In [7]:
input_data = read_file()
total_similarity(input_data)

22014209.0