In [151]:
from heapq import heappush,heappop
import pickle

In [48]:
class Node:
    def __init__(self,char,freq):
        self.char = char
        self.freq = freq
        self.left = None
        self.right = None
    def __repr__(self):
        return f"Node({self.char},{self.freq})"
    def __lt__(self,other):
        return self.freq < other.freq

In [161]:
def encode(inputfile,encodedfile):
    freqdict = {}

    with open(inputfile,'r') as file:
        text = file.read()

    for char in text:
        if char not in freqdict.keys():
            freqdict[char] = 0
        freqdict[char]+=1
        
    h = []
    for char,freq in freqdict.items():
        heappush(h,(freq,Node(char,freq)))
        
    while(len(h)>1):
        node1 = heappop(h)[1]
        node2 = heappop(h)[1]
        sumval = node1.freq  + node2.freq
        treenode = Node(None,sumval)
        treenode.left = node1
        treenode.right = node2
        heappush(h,(sumval,treenode))
    
    top = h[0][1]
    codebook = {}
    def traversetree(tree,code):
        if not tree:
            return
        if tree.char:
            codebook[tree.char] = code
        else:
            traversetree(tree.left,code+'0')
            traversetree(tree.right,code+'1')

    traversetree(top,'')
    
    encoded = ""
    for char in text:
        encoded+=codebook[char]
        
    with open(encodedfile, 'wb') as file:
        padding = 8 - (len(encoded)%8)
        encoded+='0'*padding
        bytearr = bytearray()
        for i in range(0,len(encoded),8):
            bytearr.append(int(encoded[i:i+8],2))
        file.write(bytes([padding]))
        file.write(bytearr)
    with open('huffmantree.pkl','wb') as file:
        pickle.dump(top,file)

In [165]:
def decode(inpath,outpath):
    with open('huffmantree.pkl','rb') as file:
        top = pickle.load(file)
    with open(inpath,'rb') as f:
        padding = ord(f.read(1))
        encoded = ''.join(format(byte, '08b') for byte in f.read())
    encoded = encoded[:-padding+1]
    node = top
    decoded = ''
    for bit in encoded:
        if node.char:
            decoded += node.char
            node = top
        if bit == '1' :
            node = node.right
        else:
            node = node.left

    with open(outpath,'w') as file:
        file.write(decoded)

In [168]:
encode('sherlock.txt','encoded.huff')

In [169]:
decode('encoded.huff','decoded.txt')