In [40]:
def read_file(file_path):
    """Reads the content of a file and returns it as a string."""
    try:
        with open(file_path, 'r') as file:
            return file.read()
    except FileNotFoundError:
        print(f"Error: The file '{file_path}' was not found.")
        return None

def write_file(file_path, content):
    """Writes the given content to a file."""
    with open(file_path, 'w') as file:
        file.write(content)

In [41]:
import math
def lzw_compression(data,output_path="output.txt"):

  if not data:
    return [], 0
    
  # Initialize ASCII dictionary
  ascii_dict = dict(map(lambda i: (chr(i), i), range(128)))

  current_sequence = ""
  compressed = list()
  next_code = 128
  
  # Compression process
  for next_char in data:
    new_sequence = current_sequence + next_char
    if(new_sequence in ascii_dict):
      current_sequence = new_sequence
    else : 
      compressed.append(ascii_dict[current_sequence])
      ascii_dict[new_sequence] = next_code
      next_code +=1
      current_sequence = next_char

  # Add the last sequence to the output
  if current_sequence:
    compressed.append(ascii_dict[current_sequence])

  # Calculate the compression ratio
  if compressed:
    bit_length = math.ceil(math.log2(max(compressed) + 1))
    CR = (len(data) * 8) / (len(compressed) * bit_length)
  else:
    CR = 0
  write_file(output_path, str(compressed))
  print(f"Encoded text written to '{output_path}'.")
  return compressed , CR


In [42]:
from collections import Counter
def calculate_compression_savings(original_data, compressed_data):
  original_bits = len(original_data) * 8
  compressed_bits = len(compressed_data) * math.ceil(math.log2(max(compressed_data) + 1))
  savings = (1 - compressed_bits / original_bits) * 100
  return savings

def calculate_average_bits_per_symbol(data, compressed):
  if not data or not compressed:
    return 0  # No symbols or compression
  total_bits = len(compressed) * math.ceil(math.log2(max(compressed) + 1))
  return total_bits / len(data)


def calculate_entropy(data):
  freq = Counter(data)
  total_symbols = len(data)
  probabilities = [count / total_symbols for count in freq.values()]
  entropy = -sum(p * math.log2(p) for p in probabilities if p > 0)
  return entropy

def calculate_compression_efficiency(original_data, compressed_data):
  compressed_entropy = calculate_entropy(original_data)
  average_bits = calculate_average_bits_per_symbol(original_data, compressed_data)
  return (compressed_entropy / average_bits) * 100



In [55]:
file_path = input("Enter the file path you want to compress: ")
compression_data = read_file(file_path)
compressed , compression_ratio = lzw_compression(compression_data)
savings = calculate_compression_savings(compression_data,compressed)
avg_bits = calculate_average_bits_per_symbol(compression_data,compressed)
entropy = calculate_entropy(compression_data)
efficincy = calculate_compression_efficiency(compression_data,compressed)
print("Tag :",compressed)
print("the compression ratio:",compression_ratio)
print("the compression entropy:", entropy)
print("Compression Savings:", savings)
print("Average Bits per Symbol:", avg_bits)
print("Efficincy of the LZW algorithm:",efficincy)

Encoded text written to 'output.txt'.
Tag : [65, 66, 65, 128, 128, 129, 131, 134, 130, 129, 66, 138, 139, 138]
the compression ratio: 2.0
the compression entropy: 0.996316519558962
Compression Savings: 50.0
Average Bits per Symbol: 4.0
Efficincy of the LZW algorithm: 24.90791298897405


In [46]:
import ast
def lzw_decompress(compressed_data,output_path="input.txt"):
  if not compressed_data:
    print("Error: No data to decompress.")
    return ""

  # Initialize ASCII dictionary
  ascii_dict = dict(map(lambda i: (i, chr(i)), range(128)))
  dict_size = 128

  if isinstance(compressed_data, str):
    compressed_data = ast.literal_eval(compressed_data)
    if isinstance(compressed_data,tuple):
        compressed_data = list(compressed_data)

  # Initialize decompression variables
  current_code = compressed_data.pop(0)
  pre_string = ascii_dict[current_code]
  decompressed_data = [pre_string]

  for code in compressed_data:
    if code in ascii_dict:
      current = ascii_dict[code]
    elif code == dict_size:
      current = pre_string + pre_string[0]
    else:
      raise ValueError("Invalid compressed data.")

    decompressed_data.append(current)

    # Add new sequence to the dictionary
    ascii_dict[dict_size] = pre_string + current[0]
    dict_size += 1

    pre_string = current
  decompressed_text = "".join(decompressed_data)
  write_file(output_path, decompressed_text)
  print(f"decompressed text written to '{output_path}'.")
  return "".join(decompressed_text)

In [53]:
file_path = input("Enter the file path you want to decompress: ")
data = read_file(file_path)
decompressed = lzw_decompress(data)
print("Decompressed Data:", decompressed)


decompressed text written to 'input.txt'.
Decompressed Data: ABAABABBAABAABAAAABABBBBBBBB
