In [4]:
def rle_compress(input) -> str:
    compressed = ""
    count = 1
    current = input[0]

    for i in range(len(input) + 1):
        if i < len(input) and input[i] == current:
            count += 1
        else:
            # Добавляем символ и количество (в виде символа)
            compressed += current
            compressed += chr(count + ord(' ')) # count = код печатных символов
            # Сброс для следующего символа
            if i < len(input):
                current = input[i]
                count = 1
    return compressed


# Функция для RLE-восстановления данных
def rle_decompress(compressed) -> str:
    decompressed = ""
    for i in range(0, len(compressed), 2):
        symbol = compressed[i]
        count = ord(compressed[i + 1]) - ord(' ') # Восстанавливаем число повторений
        decompressed += symbol * count
    return decompressed

In [6]:
original = "AAAABBBCCCDDDDEEEEE"
compressed = rle_compress(original)
decompressed = rle_decompress(compressed)

print(f"Original: {original}")
print(f"Compressed: {compressed}")
print(f"Decompressed: {decompressed}")
print(f"Compression ratio: {len(compressed) / len(original)}")

Original: AAAABBBCCCDDDDEEEEE
Compressed: A%B#C#D$E%
Decompressed: AAAAABBBCCCDDDDEEEEE
Compression ratio: 0.5263157894736842


In [21]:
'''struct Token {
    int offset; // Смещение назад
    int length; // Длина совпадения
    char nextChar; // Следующий символ
};'''

def lz77_compress(input, window_size):
    compressed = []
    i = 0

    while i < len(input):
        token = [0, 0, input[i]]
        max_offset = min(i, window_size)

        # Поиск наилучшего совпадения в окне
        for offset in range(1, max_offset + 1):
            length = 0
            while (i + length < len(input) and
                   input[i + length] == input[i - offset + length % offset]):
                length += 1
            if length > token[1]:
                token[0] =  offset
                token[1] = length
                token[2] = input[i + length] if i + length < len(input) else chr(0)

        compressed.append(token)
        i += token[1] + 1 # Перемещаемся за обработанную фразу

    return compressed;


def lz77_decompress(compressed):
    decompressed = ""

    for token in compressed:
        if token[0] == 0:
            decompressed += token[2]
        else:
            # Копируем фразу из уже распакованной части
            start = len(decompressed) - token[0]
            for i in range(token[1]):
                decompressed += decompressed[start + i % token[0]]

            decompressed += token[2]
    return decompressed

In [22]:
original = "ABRACADABRABRABRA"
window_size = 10

compressed = lz77_compress(original, window_size)
decompressed = lz77_decompress(compressed)

print(f"Original: {original}")
print(f"Compressed: {compressed}")
print(f"Decompressed: {decompressed}")
print(f"Compression ratio: {len(compressed) / len(original)}")

Original: ABRACADABRABRABRA
Compressed: [[0, 0, 'A'], [0, 0, 'B'], [0, 0, 'R'], [3, 1, 'C'], [2, 1, 'D'], [7, 4, 'B'], [3, 5, '\x00']]
Decompressed: ABRACADABRABRABRA 
Compression ratio: 0.4117647058823529


In [27]:
def lzw_compress(input):
    dictionary = {}
    compressed = []
    dict_size = 256

    # Инициализация словаря базовыми символами
    for i in range(dict_size):
        dictionary[chr(i)] = i
    current = ""

    for c in input:
        next = current + c
        if next in dictionary:
            current = next
        else:
            # Выводим код для current
            compressed.append(dictionary[current])
            # Добавляем новую комбинацию в словарь
            dictionary[next] = dict_size
            dict_size += 1
            current = c

    if current:
        compressed.append(dictionary[current])

    return compressed


def lzw_decompress(compressed):
    # Инициализация словаря
    dictionary = {}
    dict_size = 256
    for i in range(dict_size):
        dictionary[i] = chr(i)


    decompressed = ""

    if not compressed:
        return decompressed

    # Обрабатываем первый код
    old_code = compressed[0]
    current = dictionary[old_code]
    decompressed = current

    # Обрабатываем остальные коды
    for i in range(1, len(compressed)):
        new_code = compressed[i]
        entry = ""

        # Проверяем наличие кода в словаре
        if new_code in dictionary:
            entry = dictionary[new_code]
        elif new_code == dict_size:
            # Специальный случай для кода, который должен быть добавлен
            entry = current + current[0]
        else:
            raise Exception("Invalid compressed code: " + str(new_code))


        decompressed += entry

        # Добавляем новую комбинацию в словарь
        dictionary[dict_size] = current + entry[0];
        dict_size += 1

        current = entry

    return decompressed


In [28]:
original = "ABRACADABRABRABRA"

compressed = lzw_compress(original)
decompressed = lzw_decompress(compressed)

print(f"Original: {original}")
print(f"Compressed: {compressed}")
print(f"Decompressed: {decompressed}")
print(f"Compression ratio: {len(compressed) / len(original)}")

Original: ABRACADABRABRABRA
Compressed: [65, 66, 82, 65, 67, 65, 68, 256, 258, 257, 263, 65]
Decompressed: ABRACADABRABRABRA
Compression ratio: 0.7058823529411765
