In [1]:
import sys, os
import numpy as np
import pandas as pd
from utils.utils import read_txt, read_txt_np_int

# INPUT

In [185]:
inputfilename = './inputs/day9A.txt'

inputdata = read_txt(inputfilename)

In [186]:
compressed_files = inputdata[0]
len(compressed_files)

19999

## PART 1

In [46]:
def decompress_files(filedescription):
    decompressed = []
    for i in range(0, len(filedescription), 2):
        fileid = int(i/2)
        fileblocks = int(filedescription[i])
        decompressed += [str(fileid) for repeat in range(fileblocks)]
        try:
            emptyblocks = int(filedescription[i+1])
            decompressed += ['.' for repeat in range(emptyblocks)]
        except IndexError:
            pass
    return decompressed

def move_blocks(filedescription):
    moved_filedescription = filedescription.copy()
    empty_positions = list(np.where(filedescription == '.')[0])
    filled_positions = list(np.where(filedescription != '.')[0])
    empty_index, block_index = 0, 0
    if len(empty_positions) > 0:
        empty_index = empty_positions.pop(0)
        block_index = filled_positions.pop(-1)
    while empty_index < block_index:
        moved_filedescription[empty_index] = filedescription[block_index]
        moved_filedescription[block_index] = '.'
        block_index = filled_positions.pop(-1)
        if len(empty_positions) > 0:
            empty_index = empty_positions.pop(0)
        else:
            empty_index = block_index

    return moved_filedescription
    
def calc_checksum(filedescription):
    
    calc_filedescription = np.array(filedescription)
    calc_filedescription = calc_filedescription[calc_filedescription != '.'].astype(int)

    weight_array = np.array(range(len(calc_filedescription)))

    checksum = np.sum(calc_filedescription * weight_array)
    
    return checksum


In [48]:
test_files = '2333133121414131402'

uncompressed_files = np.array(decompress_files(compressed_files))
print(uncompressed_files)
final_files = move_blocks(uncompressed_files)
print(final_files)
calc_checksum(final_files)

['0' '0' '.' ... '9999' '9999' '9999']
['0' '0' '9999' ... '.' '.' '.']


np.int64(6307275788409)

## PART 2

In [205]:
def decompress_files_2(filedescription):
    decompressed = []
    empty_dict = {}
    position_so_far = 0
    for i in range(0, len(filedescription), 2):
        fileid = int(i/2)
        fileblocks = int(filedescription[i])
        decompressed += [(fileid, position_so_far, fileblocks)]
        position_so_far += fileblocks
        try:
            emptyblocks = int(filedescription[i+1])
            if emptyblocks:
                empty_dict[position_so_far] = emptyblocks
            position_so_far += emptyblocks
        except IndexError:
            pass
    return decompressed, empty_dict

def move_blocks_2(decompressed_files_2, decompressed_files, input_empty_dict):
    
    empty_dict = input_empty_dict.copy()
    for fileblock in decompressed_files_2[-1::-1]:
        block_position = fileblock[1]
        block_size = fileblock[2]
        file_id = fileblock[0]
        new_empty_space_size = 0

        for empty_space_position, empty_space_size in sorted(empty_dict.items()):
        
            # Only accept empty spaces that are left of the block
            if empty_space_position >= block_position:
                break
            # Only accept empty spaces that are larger than the block
            if empty_space_size >= block_size:
                # Adjust empty spaces
                empty_dict[empty_space_position] = 0
                new_empty_space_size = empty_space_size - block_size

                # Update dict
                if new_empty_space_size > 0:
                    empty_dict[empty_space_position + block_size] = new_empty_space_size
                
                # Move block
                decompressed_files[empty_space_position:empty_space_position+block_size] = decompressed_files[block_position:block_position+block_size]
                decompressed_files[block_position:block_position+block_size] = ['.' for repeat in range(block_size)]
    
                # Once we moved the block we're done
                break
                        
    return decompressed_files

def calc_checksum_2(filedescription):
    
    calc_filedescription = np.array(filedescription)
    calc_filedescription = np.where(calc_filedescription == '.', '0', calc_filedescription).astype(int)

    weight_array = np.array(range(len(calc_filedescription)))

    checksum = np.sum(calc_filedescription * weight_array)
    
    return calc_filedescription, checksum


In [212]:
# Solve part 2 - compressed_files, test_files
uncompressed_files_2, empty_dict = decompress_files_2(compressed_files)
uncompressed_files = decompress_files(compressed_files)
final_uncompressed_files = move_blocks_2(uncompressed_files_2, uncompressed_files, empty_dict)

calc_filedescription, checksum = calc_checksum_2(final_uncompressed_files)

print(checksum)

6327174563252


In [211]:
list(calc_filedescription)

[np.int64(0),
 np.int64(0),
 np.int64(9997),
 np.int64(9997),
 np.int64(9997),
 np.int64(1),
 np.int64(1),
 np.int64(1),
 np.int64(1),
 np.int64(1),
 np.int64(1),
 np.int64(1),
 np.int64(9999),
 np.int64(9999),
 np.int64(9999),
 np.int64(9999),
 np.int64(9999),
 np.int64(9999),
 np.int64(9999),
 np.int64(9999),
 np.int64(2),
 np.int64(2),
 np.int64(2),
 np.int64(2),
 np.int64(2),
 np.int64(2),
 np.int64(2),
 np.int64(2),
 np.int64(2),
 np.int64(9998),
 np.int64(9998),
 np.int64(9998),
 np.int64(9998),
 np.int64(9998),
 np.int64(9998),
 np.int64(9996),
 np.int64(9992),
 np.int64(3),
 np.int64(3),
 np.int64(3),
 np.int64(3),
 np.int64(9995),
 np.int64(9995),
 np.int64(9995),
 np.int64(9995),
 np.int64(9995),
 np.int64(4),
 np.int64(4),
 np.int64(4),
 np.int64(4),
 np.int64(4),
 np.int64(4),
 np.int64(4),
 np.int64(4),
 np.int64(5),
 np.int64(9994),
 np.int64(9994),
 np.int64(6),
 np.int64(9993),
 np.int64(9993),
 np.int64(9989),
 np.int64(9985),
 np.int64(9985),
 np.int64(7),
 np.int64(7

In [203]:
uncompressed_files

['0',
 '0',
 '1',
 '1',
 '1',
 '.',
 '.',
 '.',
 '2',
 '4',
 '.',
 '5',
 '5',
 '5',
 '5',
 '.',
 '6',
 '6',
 '6',
 '6',
 '.',
 '7',
 '7',
 '7',
 '.',
 '8',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.'

In [209]:
final_uncompressed_files

['0',
 '0',
 '1',
 '1',
 '1',
 '.',
 '.',
 '.',
 '2',
 '4',
 '.',
 '5',
 '5',
 '5',
 '5',
 '.',
 '6',
 '6',
 '6',
 '6',
 '.',
 '7',
 '7',
 '7',
 '.',
 '8',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.',
 '.'

In [195]:
np.where(final_uncompressed_files == '.', '0', final_uncompressed_files)

array(['0', '0', '1', ..., '.', '.', '.'], shape=(23498,), dtype='<U1')