In [125]:
import random
import base64
import re
import math
import heapq
from collections import Counter

In [126]:
class HuffmanNode:
    def __init__(self, char, freq):
        self.char = char
        self.freq = freq
        self.left = None
        self.right = None

    def __lt__(self, other):
        return self.freq < other.freq

def build_huffman_tree(text):
    if not text:
        return None 

    freq_counter = Counter(text)
    print(freq_counter)
    heap = [HuffmanNode(char, freq) for char, freq in freq_counter.items()]
    heapq.heapify(heap)

    while len(heap) > 1:
        left = heapq.heappop(heap)
        right = heapq.heappop(heap)
        merged = HuffmanNode(None, left.freq + right.freq)
        merged.left = left
        merged.right = right
        heapq.heappush(heap, merged)

    return heap[0]

def build_huffman_codes(node, current_code, huffman_codes):
    if node.char is not None:
        huffman_codes[node.char] = current_code
        return
    build_huffman_codes(node.left, current_code + '0', huffman_codes)
    build_huffman_codes(node.right, current_code + '1', huffman_codes)


# declare structure node
class node:
    def __init__(self) -> None:
        # for storing symbol
        self.sym = ''
        # for storing probability or frequency
        self.pro = 0.0
        self.arr = [0] * 2000
        self.top = 0

p = [node() for _ in range(2000)]
# Structure for storing Shannon-Fano codes
shannon_codes = {}  # Dictionary to store symbol-to-code mapping

# function to find shannon code
def shannon(l, h, p):
    pack1 = 0
    pack2 = 0
    diff1 = 0
    diff2 = 0
    if ((l + 1) == h or l == h or l > h):
        if (l == h or l > h):
            return
        p[h].top += 1
        p[h].arr[(p[h].top)] = 0
        p[l].top += 1
        p[l].arr[(p[l].top)] = 1
        shannon_codes[p[h].sym] = ''.join(map(str, p[h].arr[:p[h].top+1]))  # Store code
        shannon_codes[p[l].sym] = ''.join(map(str, p[l].arr[:p[l].top+1]))  # Store code
        return
    else:
        for i in range(l, h):
            pack1 = pack1 + p[i].pro
        pack2 = pack2 + p[h].pro
        diff1 = pack1 - pack2
        if (diff1 < 0):
            diff1 = diff1 * -1
        j = 2
        while (j != h - l + 1):
            k = h - j
            pack1 = pack2 = 0
            for i in range(l, k+1):
                pack1 = pack1 + p[i].pro
            for i in range(h, k, -1):
                pack2 = pack2 + p[i].pro
            diff2 = pack1 - pack2
            if (diff2 < 0):
                diff2 = diff2 * -1
            if (diff2 >= diff1):
                break
            diff1 = diff2
            j += 1
        k += 1
        for i in range(l, k+1):
            p[i].top += 1
            p[i].arr[(p[i].top)] = 1
        for i in range(k+1, h+1):
            p[i].top += 1
            p[i].arr[(p[i].top)] = 0
        # Invoke shannon function
        shannon(l, k, p)
        shannon(k+1, h, p)

# Function to sort the symbols
# based on their probability or frequency
def sortByProbability(n, p):
    temp = node()
    for j in range(1, n):
        for i in range(n - 1):
            if ((p[i].pro) > (p[i + 1].pro)):
                temp.pro = p[i].pro
                temp.sym = p[i].sym
                p[i].pro = p[i + 1].pro
                p[i].sym = p[i + 1].sym
                p[i + 1].pro = temp.pro
                p[i + 1].sym = temp.sym
                
def decode_message(encoded_message):
    decoded_message = []
    code = ""
    for bit in encoded_message:
        code += bit
        for sym, sym_code in shannon_codes.items():
            if code == sym_code:
                decoded_message.append(sym)
                code = ""
                break
    return ''.join(decoded_message)

# function to display shannon codes
def display(n, p):
    print("\n\n\n\tSymbol\tProbability\tCode", end='')
    for i in range(n - 1, -1, -1):
        print("\n\t", p[i].sym, "\t\t", round(p[i].pro, 5), "\t", end='')
        for j in range(p[i].top + 1):
            print(p[i].arr[j], end='')

# Function to calculate probabilities for custom symbols
def calculateProbabilities(p, symbols):
    total = len(symbols)
    probabilities = []  # Create a list to store probabilities
    for i in range(total):
        p[i].sym = symbols[i]
        prob = symbols.count(symbols[i]) / total
        p[i].pro = prob
        probabilities.append(prob)  # Add probability to the list
    return probabilities  # Return the list of probabilities

In [129]:
def fuente_book(file_path):
    try:
        with open(file_path, "r", encoding="utf-8") as file:
            text = file.read()
            binary_text = ''.join(format(ord(char), '08b') for char in text)
            packed_binary_text = [binary_text[i:i+8] for i in range(0, len(binary_text), 8)]
        return packed_binary_text
    except FileNotFoundError:
        print(f"El archivo '{file_path}' no se encontró.")
        return []
    
def encode_book(packets,opcion):

    if(opcion == 1):
        root = build_huffman_tree(packets)
        if root is None:
            return "", None

        huffman_codes = {}
        build_huffman_codes(root, '', huffman_codes)
        encoded_text = ''.join(huffman_codes[char] for char in packets)
        return encoded_text, root, huffman_codes
    
    elif(opcion == 2):
        decoded_text = ''.join(chr(int(char)) for char in packets)
    
        custom_probabilities = calculateProbabilities(p, decoded_text)

        n = len(decoded_text)

        # Assign custom probabilities to nodes
        for i in range(n):
            p[i].sym = decoded_text[i]
            p[i].pro = custom_probabilities[i]

        # Sorting the symbols based on their probability or frequency
        sortByProbability(n, p)

        for i in range(n):
            p[i].top = -1

        # Find the shannon code
        shannon(0, n - 1, p)
        display(n, p)
        # Encode a message
        encoded_text = ""
        for char in decoded_text:
            encoded_text += shannon_codes[char]
        return encoded_text
    
    elif(opcion == 3):
        symbols = list(range(256))  # Crear una lista de símbolos del 0 al 255
        encoded_text = []
    
        for symbol in packets:
            symbol_index = symbols.index(symbol)  # Encontrar el índice del símbolo
            encoded_text.append(symbol_index)  # Agregar el índice al mensaje codificado
            del symbols[symbol_index]  # Eliminar el símbolo de la lista
            symbols.insert(0, symbol)  # Agregar el símbolo al principio de la lista
    
        return encoded_text
    
    elif(opcion == 4):
        decoded_text = ''.join(chr(int(char)) for char in packets)
        #decoded_text_str = ''.join(decoded_text)
        encoded_text = []
        i = 0
        while i < len(decoded_text):
            run_length = 1
            while i + 1 < len(decoded_text) and decoded_text[i] == decoded_text[i + 1]:
                run_length += 1
                i += 1
            encoded_text.append((decoded_text[i], run_length))
            i += 1
        return encoded_text
    else:
        return packets
    
def ethernet_channel(packets):
    return packets

def decode_book(encoded_text , opcion, root):

    if(opcion == 1):
        if root is None:
            return ""

        decoded_text = []
        current_node = root
        for bit in encoded_text:
            if bit == '0':
                current_node = current_node.left
            else:
                current_node = current_node.right

            if current_node.char is not None:
                decoded_text.append(str(current_node.char))
                current_node = root

        # Convertir los códigos ASCII a caracteres y unirlos en una cadena
        decoded_text = ''.join(chr(int(char)) for char in decoded_text)
        return decoded_text
    
    elif(opcion == 2):
        decoded_text = decode_message(encoded_text)
        return decoded_text
    
    elif(opcion == 3):
        symbols = list(range(256))  # Crear una lista de símbolos del 0 al 255
        decoded_text = []
    
        for symbol_index in encoded_text:
            symbol = symbols[symbol_index]  # Obtener el símbolo correspondiente al índice
            decoded_text.append(symbol)  # Agregar el símbolo al mensaje decodificado
            del symbols[symbol_index]  # Eliminar el símbolo de la lista
            symbols.insert(0, symbol)  # Agregar el símbolo al principio de la lista
    
        return decoded_text
    
    elif(opcion == 4):
        decoded_text = ""
        for symbol, run_length in encoded_text:
            decoded_text += symbol * run_length
        return decoded_text
    
    else:
        decoded_text = ''.join(chr(int(char)) for char in encoded_text)
    return decoded_text

# Función para reconstruir el libro con los cambios solicitados
def reconstruct_book(decoded_text, output_file_path):
    decoded_text_str = ''.join(str(item) for item in decoded_text)
    with open(output_file_path, 'w', encoding='utf-8') as f:
        f.write(decoded_text_str)
    print(f"Texto reconstruido guardado en '{output_file_path}'.")

def handler(packets, opcion):
    if opcion == 1:
        encoded_text, root, huffman_codes = encode_book(packets, opcion)
        decoded_text = decode_book(encoded_text , opcion, root)
        print(f"Los códigos Huffman son: {huffman_codes}")
        print(f"Texto original: {packets}")
        print(f"Texto codificado: {encoded_text}")
        print(f"Texto decodificado: {decoded_text}")
        return decoded_text
    elif opcion == 2:
        encoded_text = encode_book(packets, opcion)
        decoded_text = decode_book(encoded_text , opcion, None)
        print("\n\n shannon:",encoded_text)
        print("\n\n shannon:",decoded_text)
        return decoded_text
    elif opcion == 3:
        encoded_text = encode_book(packets, opcion)
        decoded_text = decode_book(encoded_text , opcion, None)
        print(encoded_text)
        return decoded_text
    elif opcion == 4:
        encoded_text = encode_book(packets, opcion)
        print(encoded_text)
        decoded_text = decode_book(encoded_text , opcion, None)
        return decoded_text
    else: 
        encoded_text = encode_book(packets, opcion)
        decoded_text = decode_book(encoded_text , opcion, None)
        return decoded_text

In [131]:
input_file_path = "test_text.txt"
output_file_path = "test_text_reconstructed.txt"
# Codificar el libro en binario
packets = fuente_book(input_file_path)
ascii_text = [int(binary, 2) for binary in packets]
book = handler(ascii_text,1)
reconstruct_book(book, output_file_path)

Counter({97: 5, 98: 2, 114: 2, 99: 1, 100: 1})
Los códigos Huffman son: {97: '0', 100: '100', 99: '101', 114: '110', 98: '111'}
Texto original: [97, 98, 114, 97, 99, 97, 100, 97, 98, 114, 97]
Texto codificado: 01111100101010001111100
Texto decodificado: abracadabra
Texto reconstruido guardado en 'test_text_reconstructed.txt'.
