In [1]:
import numpy as np
import os
import pandas as pd
import unittest

directory_path = os.getcwd()

In [2]:
def change_1D_string_to_2D_int(array):
    array = np.array(array)
    
    int_array = np.zeros((len(array), len(array[0])))
    for line in range(len(array)):
        int_array[line] = list(map(int, array[line]))
    return int_array.astype('int')

        
def find_most_frequent_colwise(mat):
    """
    For binary matrices.
    """
    most_frequent = []
    mat = np.array(mat)
    for col in mat.transpose():
        most_frequent = np.append(most_frequent, np.argmax(np.bincount(col)))
    return most_frequent.astype('int')

def convert_bin_array_2_decimal(array):
    if type(array) == np.ndarray:
        data = np.array2string(array, separator='')
        data = data.replace('[', '')
        data = data.replace(']', '')
    else:
        data = array.astype('str')
        data = "".join(data)
    data = int(data, 2)
    return data


def find_most_frequent_colwise_df(df):
    most_common_vals = df.mode()
    return most_common_vals.values


def find_most_frequent_in_array(array):
    """
    For binary arrays.
    """
    vals, counts = np.unique(array, return_counts=True)
    if (len(counts) > 1) and (counts[0] == counts[1]):
        return int(1)
    else:
        highest_val = np.amax(counts)
        highest_index = np.where(counts == highest_val)
        return int(vals[highest_index])
    
def find_least_frequent_in_array(array):
    """
    For binary arrays.
    """
    vals, counts = np.unique(array, return_counts=True)
    if (len(counts) > 1) and (counts[0] == counts[1]):
        return int(0)
    else:
        lowest_val = np.amin(counts)
        lowest_index = np.where(counts == lowest_val)
        return int(vals[lowest_index])
    
def filter_by_bit_freq(df, keep_most_common=True):
    """
    Take a pandas df with binary values, return df data subset with most common bits.
    """
    for column in range(df.shape[1]):
        if df.shape[0] > 1:
            if keep_most_common:
                next_bit = find_most_frequent_in_array(df.iloc[:, column])#[0]
            else:
                next_bit = find_least_frequent_in_array(df.iloc[:, column])#[0]
            df = df[df.iloc[:, column] == next_bit]
    return np.array(df)
    
        
class Tests(unittest.TestCase):
    def test_change_1D_string_to_2D_int(self):
        """
        Test example input.
        """
        a = ['00100']
        np.testing.assert_array_equal(change_1D_string_to_2D_int(a), ([[0, 0, 1, 0, 0]]))
    
    def test_find_most_frequent_colwise(self):
        a = [[0,0,0], [0,0,0]]
        np.testing.assert_array_equal(find_most_frequent_colwise(a), ([0, 0, 0]))
        
        a = [[0,0,1], [0,0,1]]
        np.testing.assert_array_equal(find_most_frequent_colwise(a), ([0, 0, 1]))
        
    def test_find_most_frequent_in_array(self):
        a = [0, 1]
        self.assertEqual(find_most_frequent_in_array(a), (1))
        a = [0, 0, 1]
        self.assertEqual(find_most_frequent_in_array(a), (0))
        
    def test_filter_by_bit_freq(self):
        a = pd.DataFrame({'col0' : [0], 'col1' : [0]})
        np.testing.assert_array_equal(filter_by_bit_freq(a),  [[0, 0]])
        a = pd.DataFrame({'col0' : [0, 0], 'col1' : [0, 0]})
        np.testing.assert_array_equal(filter_by_bit_freq(a),  [[0, 0], [0, 0]])
        np.testing.assert_array_equal(filter_by_bit_freq(a, keep_most_common=True), [[0, 0], [0, 0]])
        a = pd.DataFrame({'col0' : [0, 0]})
        np.testing.assert_array_equal(filter_by_bit_freq(a),  [[0], [0]])
        

unittest.main(argv=[''], verbosity=3, exit=False)

test_change_1D_string_to_2D_int (__main__.Tests)
Test example input. ... ok
test_filter_by_bit_freq (__main__.Tests) ... ok
test_find_most_frequent_colwise (__main__.Tests) ... ok
test_find_most_frequent_in_array (__main__.Tests) ... ok

----------------------------------------------------------------------
Ran 4 tests in 0.025s

OK


<unittest.main.TestProgram at 0x116d2cb20>

In [3]:
# day 1a, version 1
diagnostic_report = np.loadtxt("{0}/inputs/input3.txt".format(directory_path), dtype='str')

diagnostic_report_2D = change_1D_string_to_2D_int(diagnostic_report)

gamma = find_most_frequent_colwise(diagnostic_report_2D)
epsilon = np.logical_not(gamma).astype('int')

gamma_decimal = convert_bin_array_2_decimal(gamma)
epsilon_decimal = convert_bin_array_2_decimal(epsilon)

print(gamma_decimal * epsilon_decimal)

2583164


In [4]:
# day 1a, version 2

diagnostic_report = np.loadtxt("{0}/inputs/input3.txt".format(directory_path), dtype='str')
diagnostic_report_2D = change_1D_string_to_2D_int(diagnostic_report)
diagnostic_report_2D_df = pd.DataFrame(diagnostic_report_2D)
gamma = find_most_frequent_colwise_df(diagnostic_report_2D_df)[0]

epsilon = np.logical_not(gamma).astype('int') 
gamma_decimal = convert_bin_array_2_decimal(gamma)
epsilon_decimal = convert_bin_array_2_decimal(epsilon)

print(gamma_decimal * epsilon_decimal)


2583164


In [5]:
# day 1b

oxygen_generator = filter_by_bit_freq(diagnostic_report_2D_df)
scrubber_rating = filter_by_bit_freq(diagnostic_report_2D_df, keep_most_common=False)

oxygen_generator_decimal = convert_bin_array_2_decimal(oxygen_generator)
scrubber_rating_decimal = convert_bin_array_2_decimal(scrubber_rating)
print(oxygen_generator_decimal * scrubber_rating_decimal)

2784375
