In [1]:
import random
import base64
import re
import math
import heapq
from collections import Counter

class HuffmanNode:
    def __init__(self, char, freq):
        self.char = char
        self.freq = freq
        self.left = None
        self.right = None

    def __lt__(self, other):
        return self.freq < other.freq

def encode_book(file_path):
    try:
        with open(file_path, "r", encoding="utf-8") as file:
            text = file.read()
            binary_text = ''.join(format(ord(char), '08b') for char in text)
            packed_binary_text = [binary_text[i:i+8] for i in range(0, len(binary_text), 8)]
        return packed_binary_text
    except FileNotFoundError:
        print(f"El archivo '{file_path}' no se encontró.")
        return []
    
def ethernet_channel(packets):
    return packets

def build_huffman_tree(text):
    if not text:
        return None 

    freq_counter = Counter(text)
    print(freq_counter)
    heap = [HuffmanNode(char, freq) for char, freq in freq_counter.items()]
    heapq.heapify(heap)

    while len(heap) > 1:
        left = heapq.heappop(heap)
        right = heapq.heappop(heap)
        merged = HuffmanNode(None, left.freq + right.freq)
        merged.left = left
        merged.right = right
        heapq.heappush(heap, merged)

    return heap[0]

def build_huffman_codes(node, current_code, huffman_codes):
    if node.char is not None:
        huffman_codes[node.char] = current_code
        return
    build_huffman_codes(node.left, current_code + '0', huffman_codes)
    build_huffman_codes(node.right, current_code + '1', huffman_codes)

def huffman_encode(text):
    root = build_huffman_tree(text)
    if root is None:
        return "", None

    huffman_codes = {}
    build_huffman_codes(root, '', huffman_codes)
    encoded_text = ''.join(huffman_codes[char] for char in text)
    return encoded_text, root, huffman_codes

def huffman_decode(encoded_text, root):
    if root is None:
        return ""

    decoded_text = []
    current_node = root
    for bit in encoded_text:
        if bit == '0':
            current_node = current_node.left
        else:
            current_node = current_node.right

        if current_node.char is not None:
            decoded_text.append(str(current_node.char))
            current_node = root

    # Convertir los códigos ASCII a caracteres y unirlos en una cadena
    decoded_text = ''.join(chr(int(char)) for char in decoded_text)

    return decoded_text

def decode_packets(packed_binary_text):
    decoded_text = ''.join(chr(int(byte, 2)) for byte in packed_binary_text)
    return decoded_text

# Función para reconstruir el libro con los cambios solicitados
def reconstruct_book(decoded_text, output_file_path):
    decoded_text_str = ''.join(decoded_text)
    with open(output_file_path, 'w', encoding='utf-8') as f:
        f.write(decoded_text_str)
    print(f"Texto reconstruido guardado en '{output_file_path}'.")


input_file_path = "test_text.txt"
output_file_path = "test_text_reconstructed.txt"
# Codificar el libro en binario
book = encode_book(input_file_path)

book_env = ethernet_channel(book)
ascii_text = [int(binary, 2) for binary in book_env]


encoded_text, huffman_tree, huffman_codes = huffman_encode(ascii_text)
print(f"Los códigos Huffman son: {huffman_codes}")
print(f"Texto original: {ascii_text}")
print(f"Texto codificado: {encoded_text}")
decoded_text = huffman_decode(encoded_text, huffman_tree)
print(f"Texto decodificado: {decoded_text}")

book_decode = decode_packets(book_env)
reconstruct_book(book_decode, output_file_path)

Counter({32: 38, 101: 19, 110: 18, 97: 18, 114: 13, 111: 13, 104: 13, 116: 10, 105: 10, 115: 10, 100: 10, 119: 8, 99: 5, 108: 5, 103: 4, 121: 4, 98: 4, 10: 3, 102: 2, 78: 2, 225: 2, 118: 2, 44: 2, 80: 2, 77: 2, 107: 2, 109: 2, 46: 2, 79: 1, 117: 1})
Los códigos Huffman son: {115: '0000', 105: '0001', 116: '0010', 118: '001100', 10: '001101', 99: '00111', 100: '0100', 111: '0101', 108: '01100', 98: '011010', 46: '0110110', 225: '0110111', 104: '0111', 114: '1000', 119: '10010', 121: '100110', 107: '1001110', 77: '1001111', 102: '1010000', 109: '1010001', 103: '101001', 80: '1010100', 44: '1010101', 117: '10101100', 79: '10101101', 78: '1010111', 97: '1011', 110: '1100', 101: '1101', 32: '111'}
Texto original: [79, 110, 101, 32, 97, 102, 116, 101, 114, 110, 111, 111, 110, 32, 110, 111, 116, 105, 99, 105, 110, 103, 32, 78, 97, 116, 225, 115, 104, 97, 32, 115, 104, 105, 118, 101, 114, 105, 110, 103, 32, 119, 105, 116, 104, 32, 102, 101, 118, 101, 114, 44, 32, 80, 114, 105, 110, 99, 101, 11

In [2]:
# Python3 program for Shannon Fano Algorithm

# declare structure node
class node:
    def __init__(self) -> None:
        # for storing symbol
        self.sym = ''
        # for storing probability or frequency
        self.pro = 0.0
        self.arr = [0] * 2000
        self.top = 0

p = [node() for _ in range(2000)]

# function to find shannon code
def shannon(l, h, p):
    pack1 = 0
    pack2 = 0
    diff1 = 0
    diff2 = 0
    if ((l + 1) == h or l == h or l > h):
        if (l == h or l > h):
            return
        p[h].top += 1
        p[h].arr[(p[h].top)] = 0
        p[l].top += 1
        p[l].arr[(p[l].top)] = 1
        return
    else:
        for i in range(l, h):
            pack1 = pack1 + p[i].pro
        pack2 = pack2 + p[h].pro
        diff1 = pack1 - pack2
        if (diff1 < 0):
            diff1 = diff1 * -1
        j = 2
        while (j != h - l + 1):
            k = h - j
            pack1 = pack2 = 0
            for i in range(l, k+1):
                pack1 = pack1 + p[i].pro
            for i in range(h, k, -1):
                pack2 = pack2 + p[i].pro
            diff2 = pack1 - pack2
            if (diff2 < 0):
                diff2 = diff2 * -1
            if (diff2 >= diff1):
                break
            diff1 = diff2
            j += 1
        k += 1
        for i in range(l, k+1):
            p[i].top += 1
            p[i].arr[(p[i].top)] = 1
        for i in range(k+1, h+1):
            p[i].top += 1
            p[i].arr[(p[i].top)] = 0
        # Invoke shannon function
        shannon(l, k, p)
        shannon(k+1, h, p)

# Function to sort the symbols
# based on their probability or frequency
def sortByProbability(n, p):
    temp = node()
    for j in range(1, n):
        for i in range(n - 1):
            if ((p[i].pro) > (p[i + 1].pro)):
                temp.pro = p[i].pro
                temp.sym = p[i].sym
                p[i].pro = p[i + 1].pro
                p[i].sym = p[i + 1].sym
                p[i + 1].pro = temp.pro
                p[i + 1].sym = temp.sym

# function to display shannon codes
def display(n, p):
    print("\n\n\n\tSymbol\tProbability\tCode", end='')
    for i in range(n - 1, -1, -1):
        print("\n\t", p[i].sym, "\t\t", p[i].pro, "\t", end='')
        for j in range(p[i].top + 1):
            print(p[i].arr[j], end='')

# Function to calculate probabilities for custom symbols
def calculateProbabilities(p, symbols):
    total = len(symbols)
    probabilities = []  # Crear una lista para almacenar las probabilidades
    for i in range(total):
        p[i].sym = symbols[i]
        prob = symbols.count(symbols[i]) / total
        p[i].pro = prob
        probabilities.append(prob)  # Agregar la probabilidad a la lista
    return probabilities  # Devolver la lista de probabilidades

# Driver code
if __name__ == '__main__':
    # Input custom symbols and their probabilities
    ascii_text
    custom_probabilities = calculateProbabilities(p,ascii_text)
    
    n = len(ascii_text)
    
    # Assign custom probabilities to nodes
    for i in range(n):
        p[i].sym = ascii_text[i]
        p[i].pro = custom_probabilities[i]
    
    # Sorting the symbols based on their probability or frequency
    sortByProbability(n, p)
    
    for i in range(n):
        p[i].top = -1
    
    # Find the shannon code
    shannon(0, n - 1, p)
    
    # Display the codes
    display(n, p)




	Symbol	Probability	Code
	 32 		 0.16740088105726872 	000000
	 32 		 0.16740088105726872 	000001
	 32 		 0.16740088105726872 	000010
	 32 		 0.16740088105726872 	0000110
	 32 		 0.16740088105726872 	0000111
	 32 		 0.16740088105726872 	000100
	 32 		 0.16740088105726872 	0001010
	 32 		 0.16740088105726872 	0001011
	 32 		 0.16740088105726872 	000110
	 32 		 0.16740088105726872 	0001110
	 32 		 0.16740088105726872 	0001111
	 32 		 0.16740088105726872 	001000
	 32 		 0.16740088105726872 	0010010
	 32 		 0.16740088105726872 	0010011
	 32 		 0.16740088105726872 	001010
	 32 		 0.16740088105726872 	0010110
	 32 		 0.16740088105726872 	0010111
	 32 		 0.16740088105726872 	001100
	 32 		 0.16740088105726872 	0011010
	 32 		 0.16740088105726872 	0011011
	 32 		 0.16740088105726872 	001110
	 32 		 0.16740088105726872 	0011110
	 32 		 0.16740088105726872 	0011111
	 32 		 0.16740088105726872 	010000
	 32 		 0.16740088105726872 	0100010
	 32 		 0.16740088105726872 	0100011
	 32 		 0.1674008810