In [75]:
from bitarray import bitarray
from collections import Counter
import math

In [63]:
def openFile(path):
    """Otwiera plik i zwraca jego zawartosc"""
    with open(path, "r") as f:
        return f.read()

def decToBin(n):
    """Zamienia liczbe dziesietna na binarna"""
    if n == 0:
        return "0"
    elif n == 1:
        return "1"
    else:
        return decToBin(n // 2) + str(n % 2)

def toBits(n):
    """Zamienia liczbe na binarna i dodaje zera na poczatek"""
    bits = decToBin(n)
    #kod ma 6 bitow
    while len(bits) < 6:
        bits = "0" + bits
    return bits

In [64]:
class drzewo:
    """Klasa reprezentujaca drzewo"""
    def __init__(self, lewy = None, prawy = None):
        self.lewy = lewy
        self.prawy = prawy
        self.wartosc = None

    def __str__(self):
        """Zwraca stringa reprezentujacego drzewo"""
        return f"({self.lewy} | {self.prawy})"

In [65]:
def generowanieKodu(node, prefix="", codebook=None):
    """Rekurencyjnie tworzy słownik kodów Huffmana:"""
    if codebook is None:
        codebook = {}

    # Jeżeli 'node' to nie obiekt drzewo, to jest to liść (symbol)
    if not isinstance(node, drzewo):
        codebook[node] = prefix or "0"   # jeśli drzewo składa się z jednego liścia, dajemy kod "0"
    else:
        # schodzimy w lewo ("0")
        generowanieKodu(node.lewy,  prefix + "0", codebook)
        # schodzimy w prawo ("1")
        generowanieKodu(node.prawy, prefix + "1", codebook)

    return codebook


In [66]:
def tworzenieDrzewa(letters):
    while len(letters) > 1:
        #tworzenie wezla
        node = drzewo()

        #dodawanie liter do wezla
        node.lewy = letters[0][0]
        node.prawy = letters[1][0]

        #dodawanie czestotliwosci liter do wezla
        node.wartosc = letters[0][1] + letters[1][1]

        # print("nowy wezel: ", node.wartosc, " lewy: ", node.lewy, " prawy: ", node.prawy)

        #dodanie wezla do listy
        letters.append((node, node.wartosc))

        #usuwanie dwoch najrzadszych liter
        letters.pop(0)
        letters.pop(0)

        #sortowanie liter w kolejnosci od najrzadszej
        letters = sorted(letters, key=lambda x: x[1], reverse=False)
    return node

In [67]:
def create(text):
    """Na podstawie czestotliwosi liter tworzy kod"""
    letters = {}
    #zliczanie czestotliwosci liter
    for letter in text:
        if letter in letters:
            letters[letter] += 1
        else:
            letters[letter] = 1

    #sortowanie liter w kolejnosci od najrzadszej
    letters = sorted(letters.items(), key=lambda x: x[1], reverse=False)
    
    node = tworzenieDrzewa(letters)
    # print(node)

    kod = generowanieKodu(node)
    return kod

In [68]:
def encode(text, kod):
    """Koduje tekst na podstawie kodu"""
    zakodowane = bitarray()
    for letter in text:
        zakodowane.extend(kod[letter])
    return zakodowane 

In [69]:
def decode(bits, kod):
    """
    Dekoduje bitarray do oryginalnego tekstu"""
    # odwracaanie kodu
    odwr = {v: k for k, v in kod.items()}

    decoded = []
    buffer = ""
    for bit in bits.to01():
        buffer += bit
        if buffer in odwr:
            decoded.append(odwr[buffer])
            buffer = ""

    return "".join(decoded)

In [70]:
def save(kod, text):
    """Zapisuje kod do pliku tekstowego, a zakodowany tekst jako binarny"""
    with open("kod.txt", "w") as f:
        for letter in kod:
            f.write(letter + " " + kod[letter] + "\n")
    with open("text.bin", "wb") as f:
        text.tofile(f)


def load():
    """Laduje kod i tekst z pliku"""
    kod = {}
    with open("kod.txt", "r") as f:
        for line in f:
            if line[0] == " ":
                letter = " "
                code = line[1:].strip()
                kod[letter] = code
            else:
                letter, code = line.split()
                kod[letter] = code.strip()
    text = bitarray()
    with open("text.bin", "rb") as f:
        text.fromfile(f)
    return kod, text

In [71]:
path = "sample.txt"
text = openFile(path)
print(text[:1000])

 albert of prussia 17 may 1490 20 march 1568 was the last grand master of the teutonic knights who after converting to lutheranism became the first monarch of the duchy of prussia the secularized state that emerged from the former monastic state of the teutonic knights albert was the first european ruler to establish protestantism as the official state religion of his lands he proved instrumental in the political spread of protestantism in its early stage ruling the prussian lands for nearly six decades 15101568 a member of the brandenburg ansbach branch of the house of hohenzollern albert s election as grand master had brought about hopes of a reversal of the declining fortune of the teutonic knights he was a skilled political administrator and leader and did indeed reverse the decline of the teutonic order however albert who was sympathetic to the demands of martin luther rebelled against the catholic church and the holy roman empire by converting the teutonic state into a protestant

In [74]:
kod = create(text)
zakodowane = encode(text, kod)
odkodowane = decode(zakodowane, kod)
print("zakodowane: ", zakodowane[:100])
print("odkodowane: ", odkodowane[:100])
print("czy odkodowane jest takie samo jak oryginalne: ", odkodowane == text)

zakodowane:  bitarray('1111100101110100010000101101011101101011011111011000101110111001100111000110011111011011110110101111')
odkodowane:   albert of prussia 17 may 1490 20 march 1568 was the last grand master of the teutonic knights who a
czy odkodowane jest takie samo jak oryginalne:  True


In [78]:
# 1. Oblicz częstości i prawdopodobieństwa
freq = Counter(text)
N = sum(freq.values())

# 2. Średnia długość kodu L
avg_length = sum((freq[sym] / N) * len(kod[sym]) for sym in freq)

# 3. Entropia H
entropy = -sum((freq[sym] / N) * math.log2(freq[sym] / N) for sym in freq)

# 4. Efektywność η = H / L
efficiency = entropy / avg_length

print(f"Średnia długość kodu: {avg_length:.4f} bitów")
print(f"Entropia źródła H: {entropy:.4f} bitów")
print(f"Efektywność kodowania η: {efficiency*100:.2f}%")

Średnia długość kodu: 4.3090 bitów
Entropia źródła H: 4.2804 bitów
Efektywność kodowania η: 99.34%
