<a href="https://colab.research.google.com/github/SherinJA/LZSS---Data-Compression-Algorithm/blob/master/LZSS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from tabulate import tabulate

# LZSS Compression
def lzss_compress(text, search_size=7, lookahead_size=5):
    search_buffer = ""
    lookahead_buffer = text[:lookahead_size]
    pos = lookahead_size
    output = []
    steps = []
    i = 0

    while lookahead_buffer:
        # Find the longest match in the search buffer
        longest_match_len = 0
        longest_match_offset = 0

        for j in range(len(search_buffer)):
            match_len = 0
            while (match_len < len(lookahead_buffer) and
                   j + match_len < len(search_buffer) and
                   search_buffer[j + match_len] == lookahead_buffer[match_len]):
                match_len += 1

            if match_len >= 1 and match_len > longest_match_len:
                longest_match_len = match_len
                longest_match_offset = len(search_buffer) - j  # Offset from end

        # Decide output based on match
        if longest_match_len >= 1:  # Only use pointers when they save space
            output.append((1, (longest_match_offset, longest_match_len)))
            matched_text = lookahead_buffer[:longest_match_len]
        else:
            output.append((0, lookahead_buffer[0]))
            matched_text = lookahead_buffer[0]
            longest_match_len = 1

        # Record step
        steps.append([i, search_buffer, lookahead_buffer,
                      f"({output[-1][0]}, {repr(output[-1][1]) if output[-1][0] == 0 else output[-1][1]})"])

        # Update buffers
        search_buffer += matched_text
        if len(search_buffer) > search_size:
            search_buffer = search_buffer[-search_size:]

        # Move forward in the lookahead buffer
        lookahead_buffer = lookahead_buffer[longest_match_len:]

        # Refill lookahead buffer
        refill_length = min(longest_match_len, len(text) - pos)
        if refill_length > 0:
            lookahead_buffer += text[pos:pos + refill_length]
            pos += refill_length

        i += 1

    print("Compression Steps:")
    print(tabulate(steps, headers=["Step", "Search Buffer", "Lookahead Buffer", "Output"],
                   tablefmt="grid", stralign="left"))
    return output

# LZSS Decompression
def lzss_decompress(compressed, search_size=7):
    buffer = ""
    output = ""
    steps = []

    for i, (flag, value) in enumerate(compressed):
        if flag == 0:  # Literal
            buffer += value
            output += value
            if len(buffer) > search_size:
                buffer = buffer[-search_size:]
        else:  # Match
            offset, length = value
            # Handle the case where we need to copy from what we're currently generating
            decoded = ""
            for j in range(length):
                if j < offset:
                    char = buffer[len(buffer) - offset + j]
                else:
                    char = decoded[j - offset]
                decoded += char

            buffer += decoded
            output += decoded
            if len(buffer) > search_size:
                buffer = buffer[-search_size:]

        steps.append([i, buffer, f"({flag}, {repr(value) if flag == 0 else value})", output])

    print("\nDecompression Steps:")
    print(tabulate(steps, headers=["Step", "Buffer", "Input", "Output"],
                   tablefmt="grid", stralign="left"))
    return output

# Calculate size of original and compressed data
def calculate_compression_stats(original_text, compressed_data):
    # Calculate original size (1 byte per character)
    original_size = len(original_text)

    # Calculate compressed size
    compressed_size = 0
    for flag, value in compressed_data:
        if flag == 0:  # Literal: 1 bit flag + 8 bits for character
            compressed_size += 1 + 8
        else:  # Pointer: 1 bit flag + bits for offset + bits for length
            # Assuming 3 bits for offset (0-7) and 3 bits for length (0-7)
            # This is an approximation; actual implementation may vary
            compressed_size += 1 + 3 + 3

    # Convert bits to bytes (round up to nearest byte)
    compressed_size_bytes = (compressed_size + 7) // 8

    # Calculate compression ratio
    compression_ratio = original_size / compressed_size_bytes if compressed_size_bytes > 0 else 0

    # Calculate space savings percentage
    space_savings = (1 - (compressed_size_bytes / original_size)) * 100 if original_size > 0 else 0

    return {
        "original_size_bytes": original_size,
        "compressed_size_bits": compressed_size,
        "compressed_size_bytes": compressed_size_bytes,
        "compression_ratio": compression_ratio,
        "space_savings_percentage": space_savings
    }

# Test with the example string
text = "abracadabracabra"
print(f"Original string: {text}\n")
compressed = lzss_compress(text)
print(f"\nCompressed output: {compressed}")
decompressed = lzss_decompress(compressed)
print(f"\nDecompressed string: {decompressed}")

# Verify decompression was correct
if text == decompressed:
    print("\nDecompression successful! Original and decompressed strings match.")
else:
    print("\nError: Decompression failed. Strings do not match.")

# Calculate and display compression statistics
stats = calculate_compression_stats(text, compressed)

print("\nCompression Statistics:")
print("-----------------------")
print(f"Original size: {stats['original_size_bytes']} bytes")
print(f"Compressed size: {stats['compressed_size_bits']} bits ({stats['compressed_size_bytes']} bytes)")
print(f"Compression ratio: {stats['compression_ratio']:.2f}:1")
print(f"Space savings: {stats['space_savings_percentage']:.2f}%")

Original string: abracadabracabra

Compression Steps:
+--------+-----------------+--------------------+-------------+
|   Step | Search Buffer   | Lookahead Buffer   | Output      |
|      0 |                 | abrac              | (0, 'a')    |
+--------+-----------------+--------------------+-------------+
|      1 | a               | braca              | (0, 'b')    |
+--------+-----------------+--------------------+-------------+
|      2 | ab              | racad              | (0, 'r')    |
+--------+-----------------+--------------------+-------------+
|      3 | abr             | acada              | (1, (3, 1)) |
+--------+-----------------+--------------------+-------------+
|      4 | abra            | cadab              | (0, 'c')    |
+--------+-----------------+--------------------+-------------+
|      5 | abrac           | adabr              | (1, (5, 1)) |
+--------+-----------------+--------------------+-------------+
|      6 | abraca          | dabra              | 

In [None]:
import streamlit as st
from tabulate import tabulate

# LZSS Compression Function (copied from the original script)
def lzss_compress(text, search_size=7, lookahead_size=5):
    search_buffer = ""
    lookahead_buffer = text[:lookahead_size]
    pos = lookahead_size
    output = []
    steps = []
    i = 0

    while lookahead_buffer:
        # Find the longest match in the search buffer
        longest_match_len = 0
        longest_match_offset = 0

        for j in range(len(search_buffer)):
            match_len = 0
            while (match_len < len(lookahead_buffer) and
                   j + match_len < len(search_buffer) and
                   search_buffer[j + match_len] == lookahead_buffer[match_len]):
                match_len += 1

            if match_len >= 1 and match_len > longest_match_len:
                longest_match_len = match_len
                longest_match_offset = len(search_buffer) - j  # Offset from end

        # Decide output based on match
        if longest_match_len >= 1:  # Only use pointers when they save space
            output.append((1, (longest_match_offset, longest_match_len)))
            matched_text = lookahead_buffer[:longest_match_len]
        else:
            output.append((0, lookahead_buffer[0]))
            matched_text = lookahead_buffer[0]
            longest_match_len = 1

        # Record step
        steps.append([i, search_buffer, lookahead_buffer,
                      f"({output[-1][0]}, {repr(output[-1][1]) if output[-1][0] == 0 else output[-1][1]})"])

        # Update buffers
        search_buffer += matched_text
        if len(search_buffer) > search_size:
            search_buffer = search_buffer[-search_size:]

        # Move forward in the lookahead buffer
        lookahead_buffer = lookahead_buffer[longest_match_len:]

        # Refill lookahead buffer
        refill_length = min(longest_match_len, len(text) - pos)
        if refill_length > 0:
            lookahead_buffer += text[pos:pos + refill_length]
            pos += refill_length

        i += 1

    return output, steps

# LZSS Decompression Function
def lzss_decompress(compressed, search_size=7):
    buffer = ""
    output = ""
    steps = []

    for i, (flag, value) in enumerate(compressed):
        if flag == 0:  # Literal
            buffer += value
            output += value
            if len(buffer) > search_size:
                buffer = buffer[-search_size:]
        else:  # Match
            offset, length = value
            # Handle the case where we need to copy from what we're currently generating
            decoded = ""
            for j in range(length):
                if j < offset:
                    char = buffer[len(buffer) - offset + j]
                else:
                    char = decoded[j - offset]
                decoded += char

            buffer += decoded
            output += decoded
            if len(buffer) > search_size:
                buffer = buffer[-search_size:]

        steps.append([i, buffer, f"({flag}, {repr(value) if flag == 0 else value})", output])

    return output, steps

# Calculate compression statistics
def calculate_compression_stats(original_text, compressed_data):
    # Calculate original size (1 byte per character)
    original_size = len(original_text)

    # Calculate compressed size
    compressed_size = 0
    for flag, value in compressed_data:
        if flag == 0:  # Literal: 1 bit flag + 8 bits for character
            compressed_size += 1 + 8
        else:  # Pointer: 1 bit flag + bits for offset + bits for length
            compressed_size += 1 + 3 + 3

    # Convert bits to bytes (round up to nearest byte)
    compressed_size_bytes = (compressed_size + 7) // 8

    # Calculate compression ratio
    compression_ratio = original_size / compressed_size_bytes if compressed_size_bytes > 0 else 0

    # Calculate space savings percentage
    space_savings = (1 - (compressed_size_bytes / original_size)) * 100 if original_size > 0 else 0

    return {
        "original_size_bytes": original_size,
        "compressed_size_bits": compressed_size,
        "compressed_size_bytes": compressed_size_bytes,
        "compression_ratio": compression_ratio,
        "space_savings_percentage": space_savings
    }

# Streamlit App
def main():
    st.title("LZSS Compression Demonstration")

    # Input section
    st.header("Input")
    input_text = st.text_input("Enter text to compress:", "abracadabracabra")
    search_size = st.slider("Search Buffer Size", min_value=1, max_value=15, value=7)
    lookahead_size = st.slider("Lookahead Buffer Size", min_value=1, max_value=10, value=5)

    # Compression button
    if st.button("Compress"):
        # Perform compression
        compressed, compression_steps = lzss_compress(input_text, search_size, lookahead_size)

        # Decompress to verify
        decompressed, decompression_steps = lzss_decompress(compressed)

        # Calculate statistics
        stats = calculate_compression_stats(input_text, compressed)

        # Display results
        st.header("Compression Results")

        # Original Text
        st.subheader("Original Text")
        st.text(input_text)

        # Compressed Output
        st.subheader("Compressed Output")
        st.write(compressed)

        # Decompressed Text
        st.subheader("Decompressed Text")
        st.text(decompressed)

        # Verification
        if input_text == decompressed:
            st.success("Decompression successful! Original and decompressed strings match.")
        else:
            st.error("Error: Decompression failed. Strings do not match.")

        # Compression Statistics
        st.subheader("Compression Statistics")
        st.write(f"Original size: {stats['original_size_bytes']} bytes")
        st.write(f"Compressed size: {stats['compressed_size_bits']} bits ({stats['compressed_size_bytes']} bytes)")
        st.write(f"Compression ratio: {stats['compression_ratio']:.2f}:1")
        st.write(f"Space savings: {stats['space_savings_percentage']:.2f}%")

        # Compression Steps (Detailed View)
        st.subheader("Compression Steps")
        compression_steps_df = pd.DataFrame(compression_steps,
                                            columns=["Step", "Search Buffer", "Lookahead Buffer", "Output"])
        st.dataframe(compression_steps_df)

        # Decompression Steps (Detailed View)
        st.subheader("Decompression Steps")
        decompression_steps_df = pd.DataFrame(decompression_steps,
                                              columns=["Step", "Buffer", "Input", "Output"])
        st.dataframe(decompression_steps_df)

# Additional imports for Streamlit
import pandas as pd

# Run the Streamlit app
if __name__ == "__main__":
    main()

2025-03-26 04:01:04.095 
  command:

    streamlit run /usr/local/lib/python3.11/dist-packages/colab_kernel_launcher.py [ARGUMENTS]
2025-03-26 04:01:04.105 Session state does not function when running a script without `streamlit run`


In [None]:
# Install required libraries
!pip install streamlit tabulate pandas

# Save the Streamlit app to a file
with open('lzss_compression_app.py', 'w') as f:
    f.write('''
# [Paste the entire contents of the artifact here]
''')

# Run the Streamlit app
!streamlit run lzss_compression_app.py & npx localtunnel 8501

y

Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[1G[0K⠙[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://34.86.108.187:8501[0m
[0m
[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K[1G[0JNeed to install the following packages:
localtunnel@2.0.2
Ok to proceed? (y) [20Gy

[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0K⠏[1G[0K⠋[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0KUsage: lt --port [num] <options>

Options:
  -p, --port                Internal HTTP server port                 [required]
  -h, --host                Upstream server providing forwarding
                                             [default: "https://localtunnel.me"]
  -s, --subdomain           Request this subdomain
  -l, --local-host          Tunnel traff

In [None]:
!pip install -q streamlit

10.1.66.247
10.1.66.247


In [None]:
!npm install localtunnel

[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K
added 22 packages in 863ms
[1G[0K⠦[1G[0K
[1G[0K⠦[1G[0K3 packages are looking for funding
[1G[0K⠦[1G[0K  run `npm fund` for details
[1G[0K⠦[1G[0K

In [None]:
!wget -q -O - ipv4.icanhazip.com

34.86.108.187
