In [66]:
import pickle
import random
from collections import Counter
import binascii

In [64]:
src_before = '''While getting his masters degree, a professor gave his students the option of solving a difficult problem instead of taking the final exam. Opting for what he thought was the easy way out, my uncle tried to find a solution to the "smallest code" problem. What his professor didn't tell him is that no one at that time knew the best solution. As the term drew to a close, David realized he'd have to start studying for the exam and starting throwing away his scratchings on the problem. As one of the papers hit the trash can, the algorithm came to him.'''
src_flag = ' flag{w0w_congrats_th1s_1s_rea11y_huffman} '
src_after = '''He published the paper "A Method for the Construction of Minimum Redundancy Codes" describing his algorithm in 1952. This became known as Huffman Coding. At the time he didn't consider copyrighting or patenting it, because was just an algorithm, and he didn't make a penny off of it. Because of its elegance and simplicity, it is described in many textbooks and several web pages. Today derivative forms of Huffman Coding can found in common electronics and web pages (for example, the Jpeg image file format).'''

In [70]:
src = binascii.hexlify((src_before+src_flag+src_after).lower().encode()).decode()
len(src)

2210

In [71]:
cnt = Counter()
for c in src:
    cnt[c]+=1

In [78]:
cnt.most_common()

[('6', 649),
 ('7', 311),
 ('2', 274),
 ('0', 208),
 ('4', 124),
 ('5', 119),
 ('3', 94),
 ('9', 81),
 ('1', 77),
 ('e', 70),
 ('f', 69),
 ('8', 55),
 ('c', 35),
 ('d', 35),
 ('b', 6),
 ('a', 3)]

In [73]:
huffs = [(v,k) for k,v in cnt.items()] # cnt, char

while len(huffs)>1:
    random.shuffle(huffs)
    huffs.sort(key=lambda x:x[0])
    c1,t1 = huffs[0]
    c2,t2 = huffs[1]
    huffs = huffs[2:]
    huffs.append((c1+c2, [t1, t2]))

In [74]:
codecs = []
def dfs(tree, pfx):
    if isinstance(tree, str):
        codecs.append((tree, pfx))
    else:
        dfs(tree[0], pfx+'0')
        dfs(tree[1], pfx+'1')

In [75]:
dfs(huffs[0][1], '')

In [76]:
codecs

[('0', '000'),
 ('3', '0010'),
 ('5', '0011'),
 ('4', '0100'),
 ('8', '01010'),
 ('f', '01011'),
 ('2', '011'),
 ('e', '10000'),
 ('1', '10001'),
 ('d', '100100'),
 ('a', '10010100'),
 ('b', '10010101'),
 ('c', '1001011'),
 ('9', '10011'),
 ('7', '101'),
 ('6', '11')]

In [79]:
src[::-1]

'e2924716d627f6660256c6966602567616d69602765607a60256864702c256c607d61687560227f66682023756761607022656770246e61602373696e6f627473656c65602e6f6d6d6f63602e6960246e657f66602e616360276e69646f63602e616d6666657860266f60237d627f666025667964716679627564602971646f64702e237567616070226567702c61627566756370246e6160237b6f6f626478756470297e616d602e6960246562696273637564602379602479602c29747963696c607d696370246e616025636e6167656c656023747960266f6025637571636562602e247960266f6026666f60297e6e6560702160256b616d6024772e64696460256860246e61602c2d686479627f676c61602e61602473757a602371677025637571636562602c247960276e69647e6564716070227f60276e69647867696279707f636022756469637e6f636024772e64696460256860256d696470256864702471602e276e69646f63602e616d66666578602371602e677f6e6b60256d616365626023796864702e22353931302e69602d686479627f676c616023796860276e696269627363756460222375646f636029736e61646e65746562702d657d696e696d60266f602e6f696473657274737e6f636025686470227f6660246f6864756d60216220227560716070256864702465

In [80]:
with open('table.pickle', 'wb') as f:
    pickle.dump(codecs, f)

In [81]:
with open('text.txt', 'w') as f:
    f.write(src)

In [82]:
codecs_map = {k:v for k,v in codecs}

In [83]:
lines = []
lenmap = {} # char: len
lines_set = set()
for c in src:
    if c not in lines_set:
        lines.append((c, cnt[c], len(codecs_map[c])))
        lenmap[c] = len(codecs_map[c])
        lines_set.add(c)

lines.sort(key=lambda x: (x[1], -x[2]))

In [84]:
lines

[('a', 3, 8),
 ('b', 6, 8),
 ('c', 35, 7),
 ('d', 35, 6),
 ('8', 55, 5),
 ('f', 69, 5),
 ('e', 70, 5),
 ('1', 77, 5),
 ('9', 81, 5),
 ('3', 94, 4),
 ('5', 119, 4),
 ('4', 124, 4),
 ('0', 208, 3),
 ('2', 274, 3),
 ('7', 311, 3),
 ('6', 649, 2)]