In [1]:
import os
import json

# Encodes as a list of (char, count) tuples
def encodeString(stringVal):
    encodedList = []
    prevChar = None
    count = 0
    for char in stringVal:
        if prevChar != char and prevChar is not None:
            encodedList.append((prevChar, count))
            count = 0
        prevChar = char
        count = count + 1
    encodedList.append((prevChar, count))
    return encodedList

def decodeString(encodedList):
    decodedStr = ''
    for item in encodedList:
        try:
            decodedStr = decodedStr + item[0] * item[1]
        except:
            print(item)
    return decodedStr

In [2]:
def encodeFile(filename, newFilename):
    with open(filename) as f:
        data = encodeString(f.read())

    with open(newFilename, 'w') as f:
        f.write(json.dumps(data))

def decodeFile(filename):
    with open(filename) as f:
        data = f.read()
    return decodeString(json.loads(data))

print(f'Original file size: {os.path.getsize("10_04_challenge_art.txt")}')
encodeFile('10_04_challenge_art.txt', '10_04_challenge_art_encoded.txt')
print(f'New file size: {os.path.getsize("10_04_challenge_art_encoded.txt")}')
print(decodeFile('10_04_challenge_art_encoded.txt'))

Original file size: 1042
New file size: 10015

|1~ |80~
|1~ |80~
|1~ |31~%|19~ |30~
|1~ |24~%|33~ |23~
|1~ |20~%|8~ |25~%|8~ |19~
|1~ |16~%|7~ |35~%|6~ |16~
|1~ |14~%|6~ |41~%|6~ |13~
|1~ |11~%|6~ |47~%|5~ |11~
|1~ |10~%|5~ |51~%|5~ |9~
|1~ |8~%|5~ |55~%|5~ |7~
|1~ |7~%|4~ |17~%|5~ |14~%|5~ |18~%|4~ |6~
|1~ |6~%|4~ |17~%|7~ |12~%|7~ |18~%|4~ |5~
|1~ |5~%|4~ |18~%|7~ |12~%|7~ |19~%|4~ |4~
|1~ |4~%|4~ |19~%|7~ |12~%|7~ |20~%|4~ |3~
|1~ |4~%|4~ |20~%|5~ |14~%|5~ |21~%|4~ |3~
|1~ |3~%|4~ |67~%|4~ |2~
|1~ |3~%|4~ |67~%|4~ |2~
|1~ |3~%|4~ |67~%|4~ |2~
|1~ |3~%|4~ |54~%|4~ |8~%|4~ |3~
|1~ |4~%|4~ |7~%|6~ |40~%|5~ |7~%|4~ |3~
|1~ |4~%|4~ |9~%|4~ |39~%|4~ |8~%|4~ |4~
|1~ |5~%|4~ |9~%|4~ |37~%|4~ |9~%|4~ |4~
|1~ |6~%|4~ |9~%|5~ |34~%|4~ |9~%|4~ |5~
|1~ |7~%|5~ |9~%|5~ |29~%|5~ |9~%|5~ |6~
|1~ |8~%|5~ |10~%|6~ |24~%|5~ |10~%|4~ |8~
|1~ |10~%|5~ |11~%|7~ |15~%|7~ |11~%|5~ |9~
|1~ |12~%|5~ |13~%|21~ |13~%|5~ |11~
|1~ |14~%|7~ |40~%|5~ |14~
|1~ |17~%|7~ |33~%|7~ |16~
|1~ |21~%|9~ |21~%|9~ |20~
|1~ |

In [3]:
# [('A', 1), ('B', 80), ('C', 10)]
# becomes A|1~B|80~C|10
def encodeFile(filename, newFilename):
    with open(filename) as f:
        data = encodeString(f.read())

    data = [f'{char}|{count}' for char, count in data]
    
    with open(newFilename, 'w') as f:
        f.write('~'.join(data))


def decodeFile(filename):
    with open(filename) as f:
        data = f.read()
        
    pairs = data.split('~')
    pairs = [p.split('|') for p in pairs]
    pairs = [(p[0], int(p[1])) for p in pairs]
    return decodeString(pairs)

In [4]:
print(f'Original file size: {os.path.getsize("10_04_challenge_art.txt")}')
encodeFile('10_04_challenge_art.txt', '10_04_challenge_art_encoded.txt')
print(f'New file size: {os.path.getsize("10_04_challenge_art_encoded.txt")}')
print(decodeFile('10_04_challenge_art_encoded.txt'))

Original file size: 1042
New file size: 4026


ValueError: invalid literal for int() with base 10: ''

In [None]:
def encodeFile(filename, newFilename):
    with open(filename) as f:
        data = encodeString(f.read())
    output = bytearray()
    for item in data:
        # Character byte
        output.extend(bytes(item[0], 'utf-8'))
        # Integer count byte
        output.extend(item[1].to_bytes(1, 'big'))
    with open(newFilename, 'wb') as binary_file:
        # Write bytes to file
        binary_file.write(output)


def decodeFile(filename):
    with open(filename, 'rb') as f:
        data = f.read()
        # Split data into pairs 
        bytePairs = [data[i:i+2] for i in range(0, len(data), 2)]
        encodedList = []
        for bytePair in bytePairs:
            encodedList.append((bytePair[:1].decode('utf-8'), int.from_bytes(bytePair[1:], 'big')))
        return decodeString(encodedList)

In [None]:
print(f'Original file size: {os.path.getsize("10_04_challenge_art.txt")}')
encodeFile('10_04_challenge_art.txt', '10_04_challenge_art_encoded.aa')

In [None]:
print(f'New file size: {os.path.getsize("10_04_challenge_art_encoded.aa")}')
print(decodeFile('10_04_challenge_art_encoded.aa'))