In [7]:
from collections import Counter
import numpy as np
import komm

In [8]:
def contar_caracteres(arquivo):
    try:
        with open(arquivo, 'r', encoding='utf-8') as f:
            texto = f.read()
        
        contador = Counter(texto)
        total_caracteres = sum(contador.values())
        
        pmf = {char: quantidade / total_caracteres for char, quantidade in contador.items()}
        
        return contador, pmf
    except FileNotFoundError:
        print("Arquivo não encontrado.")
    except Exception as e:
        print(f"Ocorreu um erro: {e}")


In [9]:
# Substitua 'livro.txt' pelo caminho do arquivo de texto que deseja analisar
contador, pmf = contar_caracteres('pg11.txt')

# Ordenar a PMF em ordem decrescente
sorted_pmf = sorted(pmf.items(), key=lambda item: item[1], reverse=True)

# Exibir contagem de caracteres
print("Contagem de caracteres:")
for char, quantidade in sorted(contador.items()):
    print(f'{char}: {quantidade}')

# Exibir PMF
print("\nPMF:")
for char, prob in sorted_pmf:
    print(f'{char}: {prob:.6f}')


Contagem de caracteres:

: 3757
 : 27601
!: 452
#: 1
$: 2
%: 1
': 4
(: 73
): 73
*: 72
,: 2569
-: 158
.: 1223
/: 6
0: 21
1: 58
2: 11
3: 12
4: 9
5: 9
6: 7
7: 5
8: 10
9: 7
:: 246
;: 193
?: 204
A: 670
B: 118
C: 181
D: 206
E: 193
F: 123
G: 182
H: 245
I: 784
J: 13
K: 81
L: 111
M: 196
N: 136
O: 146
P: 180
Q: 84
R: 165
S: 229
T: 482
U: 70
V: 26
W: 226
X: 10
Y: 100
Z: 1
[: 4
]: 4
_: 440
a: 9167
b: 1635
c: 2846
d: 5272
e: 15287
f: 2255
g: 2768
h: 7677
i: 7856
j: 223
k: 1217
l: 5102
m: 2210
n: 7935
o: 9372
p: 1795
q: 139
r: 6491
s: 7041
t: 11740
u: 3921
v: 943
w: 2745
x: 170
y: 2503
z: 78
ù: 1
—: 265
‘: 47
’: 712
“: 1129
”: 1125
•: 4
™: 57
﻿: 1

PMF:
 : 0.168382
e: 0.093259
t: 0.071621
o: 0.057175
a: 0.055924
n: 0.048408
i: 0.047926
h: 0.046834
s: 0.042954
r: 0.039599
d: 0.032162
l: 0.031125
u: 0.023920

: 0.022920
c: 0.017362
g: 0.016886
w: 0.016746
,: 0.015672
y: 0.015270
f: 0.013757
m: 0.013482
p: 0.010951
b: 0.009974
.: 0.007461
k: 0.007424
“: 0.006888
”: 0.006863
v: 0.005753
I: 0.004783
’: 0

In [10]:
# Criar código de Huffman
probs = [prob for _, prob in sorted_pmf]
huffman = komm.HuffmanCode(probs)

# Exibir código de Huffman
print("\nCódigo de Huffman:")
for (char, _), codeword in zip(sorted_pmf, huffman.codewords):
    print(f'{char}: {codeword}')



Código de Huffman:
 : (0, 0, 1)
e: (1, 1, 1)
t: (0, 1, 0, 0)
o: (1, 0, 0, 0)
a: (1, 0, 0, 1)
n: (1, 0, 1, 1)
i: (1, 1, 0, 0)
h: (1, 1, 0, 1)
s: (0, 0, 0, 0, 1)
r: (0, 0, 0, 1, 1)
d: (0, 1, 1, 0, 0)
l: (0, 1, 1, 0, 1)
u: (1, 0, 1, 0, 1)

: (0, 0, 0, 0, 0, 1)
c: (0, 1, 0, 1, 0, 0)
g: (0, 1, 0, 1, 0, 1)
w: (0, 1, 0, 1, 1, 0)
,: (0, 1, 1, 1, 0, 0)
y: (0, 1, 1, 1, 0, 1)
f: (0, 1, 1, 1, 1, 1)
m: (1, 0, 1, 0, 0, 1)
p: (0, 0, 0, 1, 0, 0, 0)
b: (0, 0, 0, 1, 0, 1, 0)
.: (0, 1, 1, 1, 1, 0, 0)
k: (0, 1, 1, 1, 1, 0, 1)
“: (1, 0, 1, 0, 0, 0, 0)
”: (1, 0, 1, 0, 0, 0, 1)
v: (0, 0, 0, 0, 0, 0, 1, 0)
I: (0, 0, 0, 1, 0, 1, 1, 0)
’: (0, 1, 0, 1, 1, 1, 0, 0)
A: (0, 1, 0, 1, 1, 1, 0, 1)
T: (0, 0, 0, 0, 0, 0, 0, 1, 0)
!: (0, 0, 0, 0, 0, 0, 1, 1, 1)
_: (0, 0, 0, 1, 0, 0, 1, 0, 0)
—: (0, 0, 0, 0, 0, 0, 0, 0, 0, 1)
:: (0, 0, 0, 0, 0, 0, 0, 0, 1, 1)
H: (0, 0, 0, 0, 0, 0, 0, 1, 1, 0)
S: (0, 0, 0, 0, 0, 0, 1, 1, 0, 0)
W: (0, 0, 0, 0, 0, 0, 1, 1, 0, 1)
j: (0, 0, 0, 1, 0, 0, 1, 0, 1, 0)
D: (0, 0, 0, 1, 0, 0, 1, 1, 