# Code Written by:
**Shweta Tiwari**
*20 Oct 2023*

## Algorithm:  LZW

In [1]:
import time

# Algorithm

In [2]:
%%time
def lzw_encode(data):
    code, code_bits = {bytes([i]): i for i in range(256)}, 8
    buffer, buffer_bits = 0, 0
    index, aux = 0, []

    while index < len(data):
        # find word
        for j in range(index + 1, len(data) + 1):
            word = data[index:j]

            # store word
            if word not in code:
                code[word] = len(code)
                word = word[:-1]
                break

        # write buffer
        buffer <<= code_bits
        buffer |= code[word]
        buffer_bits += code_bits

        # code length
        if len(code) > 2 ** code_bits:
            code_bits += 1

        # shift
        index += len(word)

        # buffer alignment
        if index >= len(data) and buffer_bits % 8:
            r = 8 - (buffer_bits % 8)
            buffer <<= r
            buffer_bits += r

        # emit output
        if not buffer_bits % 8:
            aux += int.to_bytes(buffer, buffer_bits >> 3, 'big')
            buffer, buffer_bits = 0, 0

    return bytes(aux)

CPU times: user 17 µs, sys: 0 ns, total: 17 µs
Wall time: 19.8 µs


In [3]:
%%time
def lzw_decode(data):
    code, code_bits = {i: bytes([i]) for i in range(256)}, 8
    buffer, buffer_bits = 0, 0
    index, aux = 0, []
    prefix = b''

    while index < len(data) or buffer_bits >= code_bits:
        # read buffer
        while index < len(data) and buffer_bits < code_bits:
            buffer <<= 8
            buffer |= data[index]
            buffer_bits += 8
            index += 1

        # find word
        buffer_bits -= code_bits
        key = buffer >> buffer_bits
        buffer &= (1 << buffer_bits) - 1
        word = code.get(key, prefix + prefix[:1])

        # store word
        if prefix:
            code[len(code)] = prefix + word[:1]
        prefix = word

        # code length
        if len(code) >= 2 ** code_bits:
            code_bits += 1

        # emit output
        aux += word

    return bytes(aux)

CPU times: user 4 µs, sys: 0 ns, total: 4 µs
Wall time: 6.91 µs


# Run

In [4]:
%%time
def compress(data):
    encoded = lzw_encode(data.encode('ASCII'))
    decoded = lzw_decode(encoded).decode('ASCII')
    assert data == decoded

    print('compression', len(data), '->', len(encoded), 'bytes')

CPU times: user 4 µs, sys: 0 ns, total: 4 µs
Wall time: 6.2 µs


In [5]:
%%time
compress('ATGATCATGAG')

compression 11 -> 9 bytes
CPU times: user 2.68 ms, sys: 0 ns, total: 2.68 ms
Wall time: 3.27 ms


In [6]:
%%time
compress('x' * 1000)

compression 1000 -> 51 bytes
CPU times: user 1.15 ms, sys: 1.02 ms, total: 2.17 ms
Wall time: 2.5 ms


In [7]:
%%time
compress("""
I wish that I knew what I know now
When I was younger.
I wish that I knew what I know now
When I was stronger.
""")

compression 112 -> 84 bytes
CPU times: user 262 µs, sys: 63 µs, total: 325 µs
Wall time: 328 µs


# The End